mirror of
https://github.com/stalwartlabs/mail-server.git
synced 2025-09-11 14:34:16 +08:00
Port Spam filter to Rust - part 3
This commit is contained in:
parent
4453dc8f3d
commit
f0d84c8e68
34 changed files with 1791 additions and 653 deletions
5
Cargo.lock
generated
5
Cargo.lock
generated
|
@ -6447,16 +6447,21 @@ name = "spam-filter"
|
|||
version = "0.10.7"
|
||||
dependencies = [
|
||||
"common",
|
||||
"decancer",
|
||||
"hyper 1.5.1",
|
||||
"idna 1.0.3",
|
||||
"mail-auth",
|
||||
"mail-builder",
|
||||
"mail-parser",
|
||||
"mail-send",
|
||||
"nlp",
|
||||
"psl",
|
||||
"reqwest 0.12.9",
|
||||
"smtp-proto",
|
||||
"store",
|
||||
"tokio",
|
||||
"trc",
|
||||
"unicode-security",
|
||||
"utils",
|
||||
]
|
||||
|
||||
|
|
|
@ -4,14 +4,65 @@
|
|||
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
|
||||
*/
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use utils::{config::Config, glob::GlobSet};
|
||||
|
||||
use super::{if_block::IfBlock, Expression};
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct SpamFilterConfig {
|
||||
pub list_dmarc_allow: GlobSet,
|
||||
pub list_spf_dkim_allow: GlobSet,
|
||||
pub list_freemail_providers: GlobSet,
|
||||
pub list_disposable_providers: GlobSet,
|
||||
pub list_trusted_domains: GlobSet,
|
||||
pub list_url_redirectors: GlobSet,
|
||||
pub remote_lists: Vec<RemoteListConfig>,
|
||||
pub dnsbls: Vec<DnsblConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum Target {
|
||||
Url,
|
||||
Domain,
|
||||
Email,
|
||||
Ip,
|
||||
Ipv4,
|
||||
Ipv6,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RemoteListConfig {
|
||||
pub id: String,
|
||||
pub url: String,
|
||||
pub retry: Duration, // 1 hour
|
||||
pub refresh: Duration, // 12h openphish, 6h phishtank
|
||||
pub timeout: Duration, // 10s
|
||||
pub max_size: usize, // 10MB
|
||||
pub max_entries: usize, // 100000
|
||||
pub max_entry_size: usize, // 256
|
||||
pub format: RemoteListFormat,
|
||||
pub target: Target,
|
||||
pub tag: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DnsblConfig {
|
||||
pub id: String,
|
||||
pub zone: Expression,
|
||||
pub target: Target,
|
||||
pub tags: IfBlock,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RemoteListFormat {
|
||||
List,
|
||||
Csv {
|
||||
column: u32,
|
||||
separator: char,
|
||||
skip_first: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl SpamFilterConfig {
|
||||
|
|
|
@ -680,3 +680,21 @@ impl<'x> TryFrom<Variable<'x>> for StatusCode {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'x> ResolveVariable for &'x str {
|
||||
fn resolve_variable(&self, variable: u32) -> Variable<'x> {
|
||||
match variable {
|
||||
0 => Variable::String((*self).into()),
|
||||
_ => Variable::Integer(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ResolveVariable for Vec<String> {
|
||||
fn resolve_variable(&self, variable: u32) -> Variable<'_> {
|
||||
match variable {
|
||||
0 => Variable::Array(self.iter().map(|v| Variable::String(v.into())).collect()),
|
||||
_ => Variable::Integer(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,15 +4,15 @@
|
|||
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
|
||||
*/
|
||||
|
||||
mod array;
|
||||
pub mod array;
|
||||
mod email;
|
||||
mod header;
|
||||
pub mod html;
|
||||
mod image;
|
||||
mod misc;
|
||||
pub mod image;
|
||||
pub mod misc;
|
||||
pub mod text;
|
||||
mod unicode;
|
||||
mod url;
|
||||
pub mod unicode;
|
||||
pub mod url;
|
||||
|
||||
use sieve::{runtime::Variable, FunctionMap};
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ pub fn fn_has_obscured<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
|||
.into()
|
||||
}
|
||||
|
||||
trait CharUtils {
|
||||
pub trait CharUtils {
|
||||
fn is_zwsp(&self) -> bool;
|
||||
fn is_obscured(&self) -> bool;
|
||||
}
|
||||
|
|
|
@ -4,19 +4,10 @@
|
|||
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
|
||||
*/
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
io::{BufRead, BufReader},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use mail_auth::flate2;
|
||||
use sieve::{runtime::Variable, FunctionMap};
|
||||
use store::{Deserialize, Value};
|
||||
|
||||
use crate::{
|
||||
config::scripts::RemoteList, scripts::into_sieve_value, HttpLimitResponse, USER_AGENT,
|
||||
};
|
||||
use crate::scripts::into_sieve_value;
|
||||
|
||||
use super::PluginContext;
|
||||
|
||||
|
@ -32,10 +23,6 @@ pub fn register_set(plugin_id: u32, fnc_map: &mut FunctionMap) {
|
|||
fnc_map.set_external_function("key_set", plugin_id, 4);
|
||||
}
|
||||
|
||||
pub fn register_remote(plugin_id: u32, fnc_map: &mut FunctionMap) {
|
||||
fnc_map.set_external_function("key_exists_http", plugin_id, 3);
|
||||
}
|
||||
|
||||
pub fn register_local_domain(plugin_id: u32, fnc_map: &mut FunctionMap) {
|
||||
fnc_map.set_external_function("is_local_domain", plugin_id, 2);
|
||||
}
|
||||
|
@ -118,242 +105,6 @@ pub async fn exec_set(ctx: PluginContext<'_>) -> trc::Result<Variable> {
|
|||
.map(|_| true.into())
|
||||
}
|
||||
|
||||
pub async fn exec_remote(ctx: PluginContext<'_>) -> trc::Result<Variable> {
|
||||
match exec_remote_(&ctx).await {
|
||||
Ok(result) => Ok(result),
|
||||
Err(err) => {
|
||||
// Something went wrong, try again in one hour
|
||||
const RETRY: Duration = Duration::from_secs(3600);
|
||||
|
||||
let mut _lock = ctx.server.inner.data.remote_lists.write();
|
||||
let list = _lock
|
||||
.entry(ctx.arguments[0].to_string().to_string())
|
||||
.or_insert_with(|| RemoteList {
|
||||
entries: HashSet::new(),
|
||||
expires: Instant::now(),
|
||||
});
|
||||
|
||||
if list.expires > Instant::now() {
|
||||
Ok(list
|
||||
.entries
|
||||
.contains(ctx.arguments[1].to_string().as_ref())
|
||||
.into())
|
||||
} else {
|
||||
list.expires = Instant::now() + RETRY;
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_RESOURCE_SIZE: usize = 10 * 1024 * 1024;
|
||||
|
||||
async fn exec_remote_(ctx: &PluginContext<'_>) -> trc::Result<Variable> {
|
||||
let resource = ctx.arguments[0].to_string();
|
||||
let item = ctx.arguments[1].to_string();
|
||||
|
||||
#[cfg(feature = "test_mode")]
|
||||
{
|
||||
if (resource.contains("open") && item.contains("open"))
|
||||
|| (resource.contains("tank") && item.contains("tank"))
|
||||
{
|
||||
return Ok(true.into());
|
||||
}
|
||||
}
|
||||
|
||||
if resource.is_empty() || item.is_empty() {
|
||||
return Ok(false.into());
|
||||
}
|
||||
|
||||
const TIMEOUT: Duration = Duration::from_secs(45);
|
||||
const MAX_ENTRY_SIZE: usize = 256;
|
||||
const MAX_ENTRIES: usize = 100000;
|
||||
|
||||
match ctx
|
||||
.server
|
||||
.inner
|
||||
.data
|
||||
.remote_lists
|
||||
.read()
|
||||
.get(resource.as_ref())
|
||||
{
|
||||
Some(remote_list) if remote_list.expires < Instant::now() => {
|
||||
return Ok(remote_list.entries.contains(item.as_ref()).into())
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
enum Format {
|
||||
List,
|
||||
Csv {
|
||||
column: u32,
|
||||
separator: char,
|
||||
skip_first: bool,
|
||||
},
|
||||
}
|
||||
|
||||
// Obtain parameters
|
||||
let mut format = Format::List;
|
||||
let mut expires = Duration::from_secs(12 * 3600);
|
||||
|
||||
if let Some(arr) = ctx.arguments[2].as_array() {
|
||||
// Obtain expiration
|
||||
match arr.first() {
|
||||
Some(Variable::Integer(v)) if *v > 0 => {
|
||||
expires = Duration::from_secs(*v as u64);
|
||||
}
|
||||
Some(Variable::Float(v)) if *v > 0.0 => {
|
||||
expires = Duration::from_secs(*v as u64);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
// Obtain list type
|
||||
if matches!(arr.get(1), Some(Variable::String(list_type)) if list_type.eq_ignore_ascii_case("csv"))
|
||||
{
|
||||
format = Format::Csv {
|
||||
column: arr.get(2).map(|v| v.to_integer()).unwrap_or_default() as u32,
|
||||
separator: arr
|
||||
.get(3)
|
||||
.and_then(|v| v.to_string().chars().next())
|
||||
.unwrap_or(','),
|
||||
skip_first: arr.get(4).map_or(false, |v| v.to_bool()),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let response = reqwest::Client::builder()
|
||||
.timeout(TIMEOUT)
|
||||
.user_agent(USER_AGENT)
|
||||
.build()
|
||||
.unwrap_or_default()
|
||||
.get(resource.as_ref())
|
||||
.send()
|
||||
.await
|
||||
.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.ctx(trc::Key::Url, resource.to_string())
|
||||
.details("Failed to build request")
|
||||
})?;
|
||||
|
||||
if response.status().is_success() {
|
||||
let bytes = response
|
||||
.bytes_with_limit(MAX_RESOURCE_SIZE)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.ctx(trc::Key::Url, resource.to_string())
|
||||
.details("Failed to fetch resource")
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.ctx(trc::Key::Url, resource.to_string())
|
||||
.details("Resource is too large")
|
||||
})?;
|
||||
|
||||
let reader: Box<dyn std::io::Read> = if resource.ends_with(".gz") {
|
||||
Box::new(flate2::read::GzDecoder::new(&bytes[..]))
|
||||
} else {
|
||||
Box::new(&bytes[..])
|
||||
};
|
||||
|
||||
// Lock remote list for writing
|
||||
let mut _lock = ctx.server.inner.data.remote_lists.write();
|
||||
let list = _lock
|
||||
.entry(resource.to_string())
|
||||
.or_insert_with(|| RemoteList {
|
||||
entries: HashSet::new(),
|
||||
expires: Instant::now(),
|
||||
});
|
||||
|
||||
// Make sure that the list is still expired
|
||||
if list.expires > Instant::now() {
|
||||
return Ok(list.entries.contains(item.as_ref()).into());
|
||||
}
|
||||
|
||||
for (pos, line) in BufReader::new(reader).lines().enumerate() {
|
||||
let line_ = line.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.ctx(trc::Key::Url, resource.to_string())
|
||||
.details("Failed to read line")
|
||||
})?;
|
||||
// Clear list once the first entry has been successfully fetched, decompressed and UTF8-decoded
|
||||
if pos == 0 {
|
||||
list.entries.clear();
|
||||
}
|
||||
|
||||
match &format {
|
||||
Format::List => {
|
||||
let line = line_.trim();
|
||||
if !line.is_empty() {
|
||||
list.entries.insert(line.to_string());
|
||||
}
|
||||
}
|
||||
Format::Csv {
|
||||
column,
|
||||
separator,
|
||||
skip_first,
|
||||
} if pos > 0 || !*skip_first => {
|
||||
let mut in_quote = false;
|
||||
let mut col_num = 0;
|
||||
let mut entry = String::new();
|
||||
|
||||
for ch in line_.chars() {
|
||||
if ch != '"' {
|
||||
if ch == *separator && !in_quote {
|
||||
if col_num == *column {
|
||||
break;
|
||||
} else {
|
||||
col_num += 1;
|
||||
}
|
||||
} else if col_num == *column {
|
||||
entry.push(ch);
|
||||
if entry.len() > MAX_ENTRY_SIZE {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
in_quote = !in_quote;
|
||||
}
|
||||
}
|
||||
|
||||
if !entry.is_empty() {
|
||||
list.entries.insert(entry);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if list.entries.len() == MAX_ENTRIES {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
trc::event!(
|
||||
Spam(trc::SpamEvent::ListUpdated),
|
||||
Url = resource.as_ref().to_string(),
|
||||
Total = list.entries.len(),
|
||||
);
|
||||
|
||||
// Update expiration
|
||||
list.expires = Instant::now() + expires;
|
||||
Ok(list.entries.contains(item.as_ref()).into())
|
||||
} else {
|
||||
trc::bail!(trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.ctx(trc::Key::Code, response.status().as_u16())
|
||||
.ctx(trc::Key::Url, resource.to_string())
|
||||
.details("Failed to fetch remote list"));
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn exec_local_domain(ctx: PluginContext<'_>) -> trc::Result<Variable> {
|
||||
let domain = ctx.arguments[1].to_string();
|
||||
|
||||
|
|
|
@ -31,13 +31,12 @@ pub struct PluginContext<'x> {
|
|||
pub arguments: Vec<Variable>,
|
||||
}
|
||||
|
||||
const PLUGINS_REGISTER: [RegisterPluginFnc; 14] = [
|
||||
const PLUGINS_REGISTER: [RegisterPluginFnc; 13] = [
|
||||
query::register,
|
||||
exec::register,
|
||||
lookup::register,
|
||||
lookup::register_get,
|
||||
lookup::register_set,
|
||||
lookup::register_remote,
|
||||
lookup::register_local_domain,
|
||||
dns::register,
|
||||
dns::register_exists,
|
||||
|
@ -86,15 +85,14 @@ impl Core {
|
|||
2 => lookup::exec(ctx).await,
|
||||
3 => lookup::exec_get(ctx).await,
|
||||
4 => lookup::exec_set(ctx).await,
|
||||
5 => lookup::exec_remote(ctx).await,
|
||||
6 => lookup::exec_local_domain(ctx).await,
|
||||
7 => dns::exec(ctx).await,
|
||||
8 => dns::exec_exists(ctx).await,
|
||||
9 => http::exec_header(ctx).await,
|
||||
10 => headers::exec(ctx),
|
||||
11 => text::exec_tokenize(ctx),
|
||||
12 => text::exec_domain_part(ctx),
|
||||
13 => llm_prompt::exec(ctx).await,
|
||||
5 => lookup::exec_local_domain(ctx).await,
|
||||
6 => dns::exec(ctx).await,
|
||||
7 => dns::exec_exists(ctx).await,
|
||||
8 => http::exec_header(ctx).await,
|
||||
9 => headers::exec(ctx),
|
||||
10 => text::exec_tokenize(ctx),
|
||||
11 => text::exec_domain_part(ctx),
|
||||
12 => llm_prompt::exec(ctx).await,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
|
|
@ -727,6 +727,40 @@ impl<T> TokenType<T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: AsRef<str>> TokenType<T> {
|
||||
pub fn hostname(&self) -> Option<&str> {
|
||||
match self {
|
||||
TokenType::Url(url) => url.as_ref().split_once("://").map(|(_, host)| {
|
||||
host.split_once('/')
|
||||
.map_or(host, |(h, _)| h.split_once(':').map_or(h, |(h, _)| h))
|
||||
}),
|
||||
TokenType::UrlNoScheme(url) => {
|
||||
let url = url.as_ref();
|
||||
url.split_once('/').map_or(url, |(host, _)| host).into()
|
||||
}
|
||||
TokenType::Email(email) => email.as_ref().rsplit_once('@').map(|(_, domain)| domain),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hostname_sld(&self) -> Option<&str> {
|
||||
self.hostname().and_then(|host| psl::domain_str(host))
|
||||
}
|
||||
|
||||
pub fn url_lowercase(&self, with_scheme_only: bool) -> Option<String> {
|
||||
match self {
|
||||
TokenType::Url(url) => url.as_ref().trim().to_lowercase().into(),
|
||||
TokenType::UrlNoScheme(url) if !with_scheme_only => {
|
||||
let url = url.as_ref();
|
||||
format!("http:s//{}", url.trim().to_lowercase())
|
||||
.to_lowercase()
|
||||
.into()
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
|
|
|
@ -16,6 +16,11 @@ mail-builder = { version = "0.3", features = ["ludicrous_mode"] }
|
|||
mail-auth = { version = "0.5" }
|
||||
mail-send = { version = "0.4", default-features = false, features = ["cram-md5", "ring", "tls12"] }
|
||||
psl = "2"
|
||||
hyper = { version = "1.0.1", features = ["server", "http1", "http2"] }
|
||||
idna = "1.0"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-webpki-roots", "http2", "stream"]}
|
||||
decancer = "3.0.1"
|
||||
unicode-security = "0.1.0"
|
||||
|
||||
[features]
|
||||
test_mode = []
|
||||
|
|
107
crates/spam-filter/src/analysis/bounce.rs
Normal file
107
crates/spam-filter/src/analysis/bounce.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Server;
|
||||
use mail_parser::MimeHeaders;
|
||||
|
||||
use crate::SpamFilterContext;
|
||||
|
||||
pub trait SpamFilterAnalyzeBounce: Sync + Send {
|
||||
fn spam_filter_analyze_bounce(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeBounce for Server {
|
||||
async fn spam_filter_analyze_bounce(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut has_delivery_word = false;
|
||||
let mut has_undelivery_word = false;
|
||||
let mut has_failure_word = false;
|
||||
let mut has_report_word = false;
|
||||
let mut has_not_word = false;
|
||||
|
||||
for word in ctx.output.subject.split_whitespace() {
|
||||
match word {
|
||||
"delivery" | "delivered" => {
|
||||
has_delivery_word = true;
|
||||
}
|
||||
"undeliverable" | "undelivered" => {
|
||||
has_undelivery_word = true;
|
||||
}
|
||||
"returned" | "failed" | "failure" | "warning" => {
|
||||
has_failure_word = true;
|
||||
}
|
||||
|
||||
"notice" | "report" | "status" | "mail" => {
|
||||
has_report_word = true;
|
||||
}
|
||||
|
||||
"couldn't" | "hasn't" | "not" => {
|
||||
has_not_word = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Subject contains words or phrases typical for DSN
|
||||
let has_bounce_words = has_undelivery_word
|
||||
|| (has_delivery_word && (has_failure_word || has_not_word))
|
||||
|| (has_report_word && has_failure_word);
|
||||
|
||||
if has_bounce_words {
|
||||
ctx.result.add_tag("SUBJ_BOUNCE_WORDS");
|
||||
}
|
||||
|
||||
if !ctx.input.env_from.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
match ctx.input.message.content_type() {
|
||||
Some(ct)
|
||||
if ct.ctype().eq_ignore_ascii_case("multipart")
|
||||
&& ct
|
||||
.subtype()
|
||||
.map_or(false, |s| s.eq_ignore_ascii_case("report"))
|
||||
&& ct.attribute("report-type").map_or(false, |a| {
|
||||
a.eq_ignore_ascii_case("delivery-status")
|
||||
|| a.eq_ignore_ascii_case("disposition-notification")
|
||||
}) =>
|
||||
{
|
||||
// Message is a DSN
|
||||
ctx.result.add_tag("BOUNCE");
|
||||
}
|
||||
_ => {
|
||||
let from_local = &ctx.output.from.email.local_part;
|
||||
|
||||
if from_local.contains("mdaemon")
|
||||
&& ctx.input.message.header("X-MDDSN-Message").is_some()
|
||||
{
|
||||
// Message is a DSN
|
||||
ctx.result.add_tag("BOUNCE");
|
||||
} else if from_local.contains("postmaster") || from_local.contains("mailer-daemon")
|
||||
{
|
||||
if has_bounce_words {
|
||||
ctx.result.add_tag("BOUNCE");
|
||||
} else {
|
||||
for part in &ctx.input.message.parts {
|
||||
if let Some(ct) = part.content_type() {
|
||||
let st = ct.subtype().unwrap_or_default();
|
||||
let ct = ct.ctype();
|
||||
|
||||
if (ct.eq_ignore_ascii_case("message")
|
||||
|| ct.eq_ignore_ascii_case("text"))
|
||||
&& (st.eq_ignore_ascii_case("rfc822-headers")
|
||||
|| st.eq_ignore_ascii_case("rfc822"))
|
||||
{
|
||||
// Message is a DSN
|
||||
ctx.result.add_tag("BOUNCE");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
use store::write::now;
|
||||
|
||||
use crate::SpamFilterContext;
|
||||
|
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeDate: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeDate for Core {
|
||||
impl SpamFilterAnalyzeDate for Server {
|
||||
async fn spam_filter_analyze_date(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
if let Some(date) = ctx.input.message.date() {
|
||||
let date = date.to_timestamp();
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
use mail_auth::{
|
||||
common::verify::VerifySignature, dmarc::Policy, DkimResult, DmarcResult, SpfResult,
|
||||
};
|
||||
|
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeDmarc: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeDmarc for Core {
|
||||
impl SpamFilterAnalyzeDmarc for Server {
|
||||
async fn spam_filter_analyze_dmarc(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
ctx.result
|
||||
.add_tag(match ctx.input.spf_mail_from_result.result() {
|
||||
|
@ -75,6 +75,7 @@ impl SpamFilterAnalyzeDmarc for Core {
|
|||
}
|
||||
|
||||
if self
|
||||
.core
|
||||
.spam
|
||||
.list_dmarc_allow
|
||||
.contains(&ctx.output.from.email.domain_part.fqdn)
|
||||
|
@ -85,6 +86,7 @@ impl SpamFilterAnalyzeDmarc for Core {
|
|||
ctx.result.add_tag("BLOCKLIST_DMARC");
|
||||
}
|
||||
} else if self
|
||||
.core
|
||||
.spam
|
||||
.list_spf_dkim_allow
|
||||
.contains(&ctx.output.from.email.domain_part.fqdn)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
|
||||
use crate::SpamFilterContext;
|
||||
|
||||
|
@ -11,7 +11,7 @@ pub trait SpamFilterAnalyzeEhlo: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeEhlo for Core {
|
||||
impl SpamFilterAnalyzeEhlo for Server {
|
||||
async fn spam_filter_analyze_ehlo(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
if let Some(ehlo_ip) = ctx.output.ehlo_host.ip {
|
||||
// Helo host is bare ip
|
||||
|
@ -34,8 +34,8 @@ impl SpamFilterAnalyzeEhlo for Core {
|
|||
|
||||
if matches!(
|
||||
(
|
||||
self.dns_exists_ip(&ctx.output.ehlo_host.fqdn).await,
|
||||
self.dns_exists_mx(&ctx.output.ehlo_host.fqdn).await
|
||||
self.core.dns_exists_ip(&ctx.output.ehlo_host.fqdn).await,
|
||||
self.core.dns_exists_mx(&ctx.output.ehlo_host.fqdn).await
|
||||
),
|
||||
(Ok(false), Ok(false))
|
||||
) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
use mail_parser::HeaderName;
|
||||
use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
|
||||
|
||||
|
@ -26,7 +26,7 @@ const SERVICE_ACCOUNTS: [&str; 9] = [
|
|||
];
|
||||
pub(crate) const TITLES: [&str; 7] = ["mr. ", "mrs. ", "ms. ", "dr. ", "prof. ", "rev. ", "hon. "];
|
||||
|
||||
impl SpamFilterAnalyzeFrom for Core {
|
||||
impl SpamFilterAnalyzeFrom for Server {
|
||||
async fn spam_filter_analyze_from(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut from_count = 0;
|
||||
let mut from_raw = b"".as_slice();
|
||||
|
@ -96,12 +96,14 @@ impl SpamFilterAnalyzeFrom for Core {
|
|||
is_www_dot_domain = true;
|
||||
}
|
||||
if self
|
||||
.core
|
||||
.spam
|
||||
.list_freemail_providers
|
||||
.contains(from_addr.domain_part.sld.as_deref().unwrap_or_default())
|
||||
{
|
||||
ctx.result.add_tag("FREEMAIL_FROM");
|
||||
} else if self
|
||||
.core
|
||||
.spam
|
||||
.list_disposable_providers
|
||||
.contains(from_addr.domain_part.sld.as_deref().unwrap_or_default())
|
||||
|
@ -238,7 +240,7 @@ impl SpamFilterAnalyzeFrom for Core {
|
|||
if SERVICE_ACCOUNTS.contains(&ctx.output.env_from_addr.local_part.as_str()) {
|
||||
ctx.result.add_tag("ENVFROM_SERVICE_ACCT");
|
||||
}
|
||||
if self.spam.list_freemail_providers.contains(
|
||||
if self.core.spam.list_freemail_providers.contains(
|
||||
ctx.output
|
||||
.env_from_addr
|
||||
.domain_part
|
||||
|
@ -247,7 +249,7 @@ impl SpamFilterAnalyzeFrom for Core {
|
|||
.unwrap_or_default(),
|
||||
) {
|
||||
ctx.result.add_tag("FREEMAIL_ENVFROM");
|
||||
} else if self.spam.list_disposable_providers.contains(
|
||||
} else if self.core.spam.list_disposable_providers.contains(
|
||||
ctx.output
|
||||
.env_from_addr
|
||||
.domain_part
|
||||
|
@ -261,9 +263,11 @@ impl SpamFilterAnalyzeFrom for Core {
|
|||
// Mail from no resolve to A or MX
|
||||
if matches!(
|
||||
(
|
||||
self.dns_exists_ip(&ctx.output.env_from_addr.domain_part.fqdn)
|
||||
self.core
|
||||
.dns_exists_ip(&ctx.output.env_from_addr.domain_part.fqdn)
|
||||
.await,
|
||||
self.dns_exists_mx(&ctx.output.env_from_addr.domain_part.fqdn)
|
||||
self.core
|
||||
.dns_exists_mx(&ctx.output.env_from_addr.domain_part.fqdn)
|
||||
.await
|
||||
),
|
||||
(Ok(false), Ok(false))
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
use mail_parser::HeaderName;
|
||||
use store::ahash::AHashSet;
|
||||
|
||||
|
@ -13,7 +13,7 @@ pub trait SpamFilterAnalyzeHeaders: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeHeaders for Core {
|
||||
impl SpamFilterAnalyzeHeaders for Server {
|
||||
async fn spam_filter_analyze_headers(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut list_score = 0.0;
|
||||
let mut unique_headers = AHashSet::new();
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
use common::Core;
|
||||
use mail_parser::{parsers::fields::thread::thread_name, HeaderName};
|
||||
use std::collections::HashSet;
|
||||
|
||||
use common::Server;
|
||||
use mail_parser::{parsers::fields::thread::thread_name, HeaderName, PartType};
|
||||
use nlp::tokenizers::types::{TokenType, TypesTokenizer};
|
||||
|
||||
use crate::{
|
||||
modules::html::{html_to_tokens, HtmlToken, HREF, SRC},
|
||||
Email, Hostname, Recipient, SpamFilterContext, SpamFilterInput, SpamFilterOutput,
|
||||
SpamFilterResult,
|
||||
SpamFilterResult, TextPart,
|
||||
};
|
||||
|
||||
pub trait SpamFilterInit {
|
||||
|
@ -12,9 +16,9 @@ pub trait SpamFilterInit {
|
|||
|
||||
const POSTMASTER_ADDRESSES: [&str; 3] = ["postmaster", "mailer-daemon", "root"];
|
||||
|
||||
impl SpamFilterInit for Core {
|
||||
impl SpamFilterInit for Server {
|
||||
fn spam_filter_init<'x>(&self, input: SpamFilterInput<'x>) -> SpamFilterContext<'x> {
|
||||
let mut subject = String::new();
|
||||
let mut subject = "";
|
||||
let mut from = None;
|
||||
let mut reply_to = None;
|
||||
let mut recipients_to = Vec::new();
|
||||
|
@ -67,7 +71,7 @@ impl SpamFilterInit for Core {
|
|||
});
|
||||
}
|
||||
HeaderName::Subject => {
|
||||
subject = header.value().as_text().unwrap_or_default().to_lowercase();
|
||||
subject = header.value().as_text().unwrap_or_default();
|
||||
}
|
||||
HeaderName::From => {
|
||||
from = header.value().as_address().and_then(|addrs| addrs.first());
|
||||
|
@ -76,6 +80,143 @@ impl SpamFilterInit for Core {
|
|||
}
|
||||
}
|
||||
|
||||
// Tokenize subject
|
||||
let subject_tokens = TypesTokenizer::new(subject)
|
||||
.tokenize_numbers(false)
|
||||
.tokenize_urls(true)
|
||||
.tokenize_urls_without_scheme(true)
|
||||
.tokenize_emails(true)
|
||||
.map(|t| t.word)
|
||||
.collect::<Vec<_>>();
|
||||
let subject = subject.to_lowercase();
|
||||
|
||||
// Tokenize and convert text parts
|
||||
let mut text_parts = Vec::new();
|
||||
let mut text_parts_nested = Vec::new();
|
||||
let mut message_stack = Vec::new();
|
||||
let mut message_iter = input.message.parts.iter();
|
||||
|
||||
loop {
|
||||
while let Some(part) = message_iter.next() {
|
||||
let is_main_message = message_stack.is_empty();
|
||||
let text_part = match &part.body {
|
||||
PartType::Text(text) => TextPart::Plain {
|
||||
text_body: text.as_ref(),
|
||||
tokens: TypesTokenizer::new(text.as_ref())
|
||||
.tokenize_numbers(false)
|
||||
.tokenize_urls(true)
|
||||
.tokenize_urls_without_scheme(true)
|
||||
.tokenize_emails(true)
|
||||
.map(|t| t.word)
|
||||
.collect::<Vec<_>>(),
|
||||
},
|
||||
PartType::Html(html) => {
|
||||
let html_tokens = html_to_tokens(html);
|
||||
let text_body_len = html_tokens
|
||||
.iter()
|
||||
.filter_map(|t| match t {
|
||||
HtmlToken::Text { text } => text.len().into(),
|
||||
_ => None,
|
||||
})
|
||||
.sum();
|
||||
let mut text_body = String::with_capacity(text_body_len);
|
||||
for token in &html_tokens {
|
||||
if let HtmlToken::Text { text } = token {
|
||||
if !text_body.is_empty()
|
||||
&& !text_body.ends_with(' ')
|
||||
&& text.starts_with(' ')
|
||||
{
|
||||
text_body.push(' ');
|
||||
}
|
||||
text_body.push_str(text)
|
||||
}
|
||||
}
|
||||
|
||||
TextPart::Html {
|
||||
tokens: TypesTokenizer::new(&text_body)
|
||||
.tokenize_numbers(false)
|
||||
.tokenize_urls(true)
|
||||
.tokenize_urls_without_scheme(true)
|
||||
.tokenize_emails(true)
|
||||
.map(|t| match t.word {
|
||||
TokenType::Alphabetic(s) => {
|
||||
TokenType::Alphabetic(s.to_string())
|
||||
}
|
||||
TokenType::Alphanumeric(s) => {
|
||||
TokenType::Alphanumeric(s.to_string())
|
||||
}
|
||||
TokenType::Integer(s) => TokenType::Integer(s.to_string()),
|
||||
TokenType::Other(s) => TokenType::Other(s),
|
||||
TokenType::Punctuation(s) => TokenType::Punctuation(s),
|
||||
TokenType::Space => TokenType::Space,
|
||||
TokenType::Url(s) => TokenType::Url(s.to_string()),
|
||||
TokenType::UrlNoScheme(s) => {
|
||||
TokenType::UrlNoScheme(s.to_string())
|
||||
}
|
||||
TokenType::UrlNoHost(s) => TokenType::UrlNoHost(s.to_string()),
|
||||
TokenType::IpAddr(s) => TokenType::IpAddr(s.to_string()),
|
||||
TokenType::Email(s) => TokenType::Email(s.to_string()),
|
||||
TokenType::Float(s) => TokenType::Float(s.to_string()),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
html_tokens,
|
||||
text_body,
|
||||
}
|
||||
}
|
||||
PartType::Message(message) => {
|
||||
message_stack.push(message_iter);
|
||||
message_iter = message.parts.iter();
|
||||
TextPart::None
|
||||
}
|
||||
_ => TextPart::None,
|
||||
};
|
||||
|
||||
if is_main_message {
|
||||
text_parts.push(text_part);
|
||||
} else if !matches!(text_part, TextPart::None) {
|
||||
text_parts_nested.push(text_part);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(iter) = message_stack.pop() {
|
||||
message_iter = iter;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
text_parts.extend(text_parts_nested);
|
||||
|
||||
// Extract URLs
|
||||
let mut urls: HashSet<String> =
|
||||
HashSet::from_iter(subject_tokens.iter().filter_map(|t| t.url_lowercase(false)));
|
||||
for part in &text_parts {
|
||||
match part {
|
||||
TextPart::Plain { tokens, .. } => {
|
||||
urls.extend(tokens.iter().filter_map(|t| t.url_lowercase(false)));
|
||||
}
|
||||
TextPart::Html {
|
||||
html_tokens,
|
||||
tokens,
|
||||
..
|
||||
} => {
|
||||
for token in html_tokens {
|
||||
if let HtmlToken::StartTag { attributes, .. } = token {
|
||||
for (attr, value) in attributes {
|
||||
match value {
|
||||
Some(value) if [HREF, SRC].contains(attr) => {
|
||||
urls.insert(value.trim().to_lowercase());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
urls.extend(tokens.iter().filter_map(|t| t.url_lowercase(false)));
|
||||
}
|
||||
TextPart::None => {}
|
||||
}
|
||||
}
|
||||
|
||||
let env_from_addr = Email::new(input.env_from);
|
||||
SpamFilterContext {
|
||||
output: SpamFilterOutput {
|
||||
|
@ -101,9 +242,12 @@ impl SpamFilterInit for Core {
|
|||
reply_to,
|
||||
subject_thread: thread_name(&subject).to_string(),
|
||||
subject,
|
||||
subject_tokens,
|
||||
recipients_to,
|
||||
recipients_cc,
|
||||
recipients_bcc,
|
||||
text_parts,
|
||||
urls,
|
||||
},
|
||||
input,
|
||||
result: SpamFilterResult {
|
||||
|
@ -117,7 +261,7 @@ impl SpamFilterInit for Core {
|
|||
|
||||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
|
||||
use crate::SpamFilterContext;
|
||||
|
||||
|
@ -128,7 +272,7 @@ pub trait SpamFilterAnalyze!: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyze! for Core {
|
||||
impl SpamFilterAnalyze! for Server {
|
||||
async fn spam_filter_analyze_*(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
todo!()
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
use mail_auth::IprevResult;
|
||||
|
||||
use crate::SpamFilterContext;
|
||||
|
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeIpRev: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeIpRev for Core {
|
||||
impl SpamFilterAnalyzeIpRev for Server {
|
||||
async fn spam_filter_analyze_iprev(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
match &ctx.input.iprev_result.result {
|
||||
IprevResult::TempError(_) => ctx.result.add_tag("RDNS_DNSFAIL"),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
use mail_parser::HeaderName;
|
||||
|
||||
use crate::{Hostname, SpamFilterContext};
|
||||
|
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeMid: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeMid for Core {
|
||||
impl SpamFilterAnalyzeMid for Server {
|
||||
async fn spam_filter_analyze_message_id(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mid_raw = ctx
|
||||
.input
|
||||
|
|
|
@ -4,6 +4,7 @@ use mail_parser::{parsers::MessageStream, Header};
|
|||
|
||||
use crate::{Recipient, SpamFilterInput, SpamFilterOutput, SpamFilterResult};
|
||||
|
||||
pub mod bounce;
|
||||
pub mod date;
|
||||
pub mod dmarc;
|
||||
pub mod ehlo;
|
||||
|
@ -12,8 +13,11 @@ pub mod headers;
|
|||
pub mod init;
|
||||
pub mod iprev;
|
||||
pub mod messageid;
|
||||
pub mod received;
|
||||
pub mod recipient;
|
||||
pub mod replyto;
|
||||
pub mod subject;
|
||||
pub mod url;
|
||||
|
||||
impl SpamFilterInput<'_> {
|
||||
pub fn header_as_address(&self, header: &Header<'_>) -> Option<Cow<'_, str>> {
|
||||
|
@ -27,7 +31,7 @@ impl SpamFilterInput<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
impl SpamFilterOutput {
|
||||
impl SpamFilterOutput<'_> {
|
||||
pub fn all_recipients(&self) -> impl Iterator<Item = &Recipient> {
|
||||
self.recipients_to
|
||||
.iter()
|
||||
|
|
146
crates/spam-filter/src/analysis/received.rs
Normal file
146
crates/spam-filter/src/analysis/received.rs
Normal file
|
@ -0,0 +1,146 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Server;
|
||||
use mail_parser::{HeaderName, Host};
|
||||
|
||||
use crate::SpamFilterContext;
|
||||
|
||||
pub trait SpamFilterAnalyzeReceived: Sync + Send {
|
||||
fn spam_filter_analyze_received(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeReceived for Server {
|
||||
async fn spam_filter_analyze_received(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut rcvd_count = 0;
|
||||
let mut rcvd_from_ip = 0;
|
||||
let mut tls_count = 0;
|
||||
let mut has_ua = false;
|
||||
|
||||
for header in ctx.input.message.headers() {
|
||||
match &header.name {
|
||||
HeaderName::Received => {
|
||||
if !ctx
|
||||
.input
|
||||
.message
|
||||
.raw_message()
|
||||
.get(header.offset_start..header.offset_end)
|
||||
.unwrap_or_default()
|
||||
.is_ascii()
|
||||
{
|
||||
// Received headers have non-ASCII characters
|
||||
ctx.result.add_tag("RCVD_ILLEGAL_CHARS");
|
||||
}
|
||||
|
||||
if let Some(received) = header.value().as_received() {
|
||||
let helo_domain = received.helo();
|
||||
let ip_rev = received.from_iprev();
|
||||
|
||||
if matches!(&helo_domain, Some(Host::Name(hostname)) if hostname.eq_ignore_ascii_case("localhost"))
|
||||
{
|
||||
// HELO domain is "user"
|
||||
ctx.result.add_tag("RCVD_HELO_USER");
|
||||
} else if let (Some(Host::Name(helo_domain)), Some(ip_rev)) =
|
||||
(helo_domain, ip_rev)
|
||||
{
|
||||
if helo_domain.to_lowercase() != ip_rev.to_lowercase() {
|
||||
// HELO domain does not match PTR record
|
||||
ctx.result.add_tag("FORGED_RCVD_TRAIL");
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(delivered_for) = received.for_().map(|s| s.to_lowercase()) {
|
||||
if ctx
|
||||
.output
|
||||
.all_recipients()
|
||||
.any(|r| r.email.address == delivered_for)
|
||||
{
|
||||
// Recipient appears on Received trail
|
||||
ctx.result.add_tag("PREVIOUSLY_DELIVERED");
|
||||
}
|
||||
}
|
||||
|
||||
if received.from_ip().is_some() {
|
||||
// Received from an IP address rather than a FQDN
|
||||
rcvd_from_ip += 1;
|
||||
}
|
||||
|
||||
if received.tls_version().is_some() {
|
||||
// Received with TLS
|
||||
tls_count += 1;
|
||||
}
|
||||
} else {
|
||||
// Received header is not RFC 5322 compliant
|
||||
ctx.result.add_tag("RCVD_UNPARSABLE");
|
||||
}
|
||||
|
||||
rcvd_count += 1;
|
||||
}
|
||||
HeaderName::Other(name) => {
|
||||
if !has_ua
|
||||
&& (name.eq_ignore_ascii_case("User-Agent")
|
||||
|| name.eq_ignore_ascii_case("X-Mailer"))
|
||||
{
|
||||
has_ua = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if rcvd_from_ip >= 2 || (rcvd_from_ip == 1 && ctx.output.ehlo_host.ip.is_some()) {
|
||||
// Has two or more Received headers containing bare IP addresses
|
||||
ctx.result.add_tag("RCVD_DOUBLE_IP_SPAM");
|
||||
}
|
||||
|
||||
// Received from an authenticated user
|
||||
if !ctx.input.authenticated_as.is_empty() {
|
||||
ctx.result.add_tag("RCVD_VIA_SMTP_AUTH");
|
||||
}
|
||||
|
||||
// Received with TLS checks
|
||||
if rcvd_count > 0 && rcvd_count == tls_count && !ctx.input.tls_version.is_empty() {
|
||||
ctx.result.add_tag("RCVD_TLS_ALL");
|
||||
} else if !ctx.input.tls_version.is_empty() {
|
||||
ctx.result.add_tag("RCVD_TLS_LAST");
|
||||
} else {
|
||||
ctx.result.add_tag("RCVD_NO_TLS_LAST");
|
||||
}
|
||||
|
||||
match rcvd_count {
|
||||
0 => {
|
||||
ctx.result.add_tag("RCVD_COUNT_ZERO");
|
||||
|
||||
// One received header in a message (currently zero
|
||||
// but one header will be added later by the MTA)
|
||||
ctx.result.add_tag("ONCE_RECEIVED");
|
||||
|
||||
// Message has been directly delivered from MUA to local MX
|
||||
if has_ua {
|
||||
ctx.result.add_tag("DIRECT_TO_MX");
|
||||
}
|
||||
}
|
||||
1 => {
|
||||
ctx.result.add_tag("RCVD_COUNT_ONE");
|
||||
}
|
||||
2 => {
|
||||
ctx.result.add_tag("RCVD_COUNT_TWO");
|
||||
}
|
||||
3 => {
|
||||
ctx.result.add_tag("RCVD_COUNT_THREE");
|
||||
}
|
||||
4 | 5 => {
|
||||
ctx.result.add_tag("RCVD_COUNT_FIVE");
|
||||
}
|
||||
6 | 7 => {
|
||||
ctx.result.add_tag("RCVD_COUNT_SEVEN");
|
||||
}
|
||||
8..=12 => {
|
||||
ctx.result.add_tag("RCVD_COUNT_TWELVE");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::{scripts::functions::text::levenshtein_distance, Core};
|
||||
use common::{scripts::functions::text::levenshtein_distance, Server};
|
||||
use mail_parser::HeaderName;
|
||||
use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
|
||||
use store::ahash::HashSet;
|
||||
|
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeRecipient: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeRecipient for Core {
|
||||
impl SpamFilterAnalyzeRecipient for Server {
|
||||
async fn spam_filter_analyze_recipient(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut to_raw = b"".as_slice();
|
||||
let mut cc_raw = b"".as_slice();
|
||||
|
@ -191,7 +191,7 @@ impl SpamFilterAnalyzeRecipient for Core {
|
|||
|
||||
// Check for freemail or disposable domains
|
||||
if let Some(domain) = rcpt.email.domain_part.sld.as_deref() {
|
||||
if self.spam.list_freemail_providers.contains(domain) {
|
||||
if self.core.spam.list_freemail_providers.contains(domain) {
|
||||
if ctx
|
||||
.output
|
||||
.recipients_to
|
||||
|
@ -202,7 +202,7 @@ impl SpamFilterAnalyzeRecipient for Core {
|
|||
} else {
|
||||
ctx.result.add_tag("FREEMAIL_CC");
|
||||
}
|
||||
} else if self.spam.list_disposable_providers.contains(domain) {
|
||||
} else if self.core.spam.list_disposable_providers.contains(domain) {
|
||||
if ctx
|
||||
.output
|
||||
.recipients_to
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Core;
|
||||
use common::Server;
|
||||
use mail_parser::HeaderName;
|
||||
|
||||
use crate::SpamFilterContext;
|
||||
|
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeReplyTo: Sync + Send {
|
|||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeReplyTo for Core {
|
||||
impl SpamFilterAnalyzeReplyTo for Server {
|
||||
async fn spam_filter_analyze_reply_to(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut reply_to_raw = b"".as_slice();
|
||||
let mut is_from_list = false;
|
||||
|
@ -104,7 +104,12 @@ impl SpamFilterAnalyzeReplyTo for Core {
|
|||
.sld
|
||||
.as_deref()
|
||||
.unwrap_or_default();
|
||||
if self.spam.list_freemail_providers.contains(reply_to_sld) {
|
||||
if self
|
||||
.core
|
||||
.spam
|
||||
.list_freemail_providers
|
||||
.contains(reply_to_sld)
|
||||
{
|
||||
ctx.result.add_tag("FREEMAIL_REPLYTO");
|
||||
let from_domain_sld = ctx
|
||||
.output
|
||||
|
@ -115,11 +120,20 @@ impl SpamFilterAnalyzeReplyTo for Core {
|
|||
.as_deref()
|
||||
.unwrap_or_default();
|
||||
if reply_to_sld != from_domain_sld
|
||||
&& self.spam.list_freemail_providers.contains(from_domain_sld)
|
||||
&& self
|
||||
.core
|
||||
.spam
|
||||
.list_freemail_providers
|
||||
.contains(from_domain_sld)
|
||||
{
|
||||
ctx.result.add_tag("FREEMAIL_REPLYTO_NEQ_FROM_DOM");
|
||||
}
|
||||
} else if self.spam.list_disposable_providers.contains(reply_to_sld) {
|
||||
} else if self
|
||||
.core
|
||||
.spam
|
||||
.list_disposable_providers
|
||||
.contains(reply_to_sld)
|
||||
{
|
||||
ctx.result.add_tag("DISPOSABLE_REPLYTO");
|
||||
}
|
||||
|
||||
|
|
190
crates/spam-filter/src/analysis/subject.rs
Normal file
190
crates/spam-filter/src/analysis/subject.rs
Normal file
|
@ -0,0 +1,190 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Server;
|
||||
use mail_parser::HeaderName;
|
||||
use nlp::tokenizers::types::TokenType;
|
||||
use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
|
||||
|
||||
use crate::{Email, SpamFilterContext};
|
||||
|
||||
pub trait SpamFilterAnalyzeSubject: Sync + Send {
|
||||
fn spam_filter_analyze_subject(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeSubject for Server {
|
||||
async fn spam_filter_analyze_subject(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut subject_raw = b"".as_slice();
|
||||
let mut is_reply = false;
|
||||
|
||||
for header in ctx.input.message.headers() {
|
||||
match &header.name {
|
||||
HeaderName::Subject => {
|
||||
subject_raw = ctx
|
||||
.input
|
||||
.message
|
||||
.raw_message()
|
||||
.get(header.offset_start..header.offset_end)
|
||||
.unwrap_or_default();
|
||||
}
|
||||
HeaderName::InReplyTo | HeaderName::References => {
|
||||
is_reply = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if subject_raw.is_empty() {
|
||||
// Missing subject header
|
||||
ctx.result.add_tag("MISSING_SUBJECT");
|
||||
return;
|
||||
}
|
||||
|
||||
let mut word_count = 0;
|
||||
let mut upper_count = 0;
|
||||
let mut lower_count = 0;
|
||||
|
||||
let mut last_ch = ' ';
|
||||
let mut last_ch_trimmed = ' ';
|
||||
let mut is_ascii = true;
|
||||
|
||||
for ch in ctx.output.subject_thread.chars() {
|
||||
if !ch.is_whitespace() {
|
||||
if last_ch.is_whitespace() {
|
||||
word_count += 1;
|
||||
}
|
||||
|
||||
match ch {
|
||||
'$' | '€' | '£' | '¥' | '₹' | '₽' | '₿' => {
|
||||
ctx.result.add_tag("SUBJECT_HAS_CURRENCY");
|
||||
}
|
||||
'!' => {
|
||||
ctx.result.add_tag("SUBJECT_HAS_EXCLAIM");
|
||||
}
|
||||
'?' => {
|
||||
ctx.result.add_tag("SUBJECT_HAS_QUESTION");
|
||||
}
|
||||
_ => {
|
||||
if ch.is_alphabetic() {
|
||||
if ch.is_uppercase() {
|
||||
upper_count += 1;
|
||||
} else {
|
||||
lower_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_ch_trimmed = ch;
|
||||
}
|
||||
|
||||
if !ch.is_ascii() {
|
||||
is_ascii = false;
|
||||
}
|
||||
|
||||
last_ch = ch;
|
||||
}
|
||||
|
||||
match last_ch_trimmed {
|
||||
'?' => {
|
||||
ctx.result.add_tag("SUBJECT_ENDS_QUESTION");
|
||||
}
|
||||
'!' => {
|
||||
ctx.result.add_tag("SUBJECT_ENDS_EXCLAIM");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if last_ch.is_whitespace() {
|
||||
if last_ch_trimmed.is_whitespace() {
|
||||
// Subject is empty
|
||||
ctx.result.add_tag("EMPTY_SUBJECT");
|
||||
} else {
|
||||
// Subject ends with whitespace
|
||||
ctx.result.add_tag("SUBJECT_ENDS_SPACES");
|
||||
}
|
||||
}
|
||||
|
||||
if ctx.output.subject_thread.len() >= 10
|
||||
&& word_count > 1
|
||||
&& upper_count > 2
|
||||
&& lower_count == 0
|
||||
{
|
||||
// Subject contains mostly capital letters
|
||||
ctx.result.add_tag("SUBJ_ALL_CAPS");
|
||||
}
|
||||
|
||||
if ctx.output.subject_thread.len() > 200 {
|
||||
// Subject is very long
|
||||
ctx.result.add_tag("LONG_SUBJ");
|
||||
}
|
||||
|
||||
for token in &ctx.output.subject_tokens {
|
||||
match token {
|
||||
TokenType::Url(_) => {
|
||||
// Subject contains URL
|
||||
ctx.result.add_tag("URL_IN_SUBJECT");
|
||||
}
|
||||
TokenType::Email(address) => {
|
||||
// Subject contains recipient
|
||||
let email = Email::new(address);
|
||||
if ctx.output.env_to_addr.contains(&email)
|
||||
|| ctx
|
||||
.output
|
||||
.all_recipients()
|
||||
.any(|r| r.email.address == email.address)
|
||||
{
|
||||
ctx.result.add_tag("RCPT_IN_SUBJECT");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if let Some(hostname) = token.hostname_sld() {
|
||||
let hostname = Some(hostname.to_lowercase());
|
||||
if ctx
|
||||
.output
|
||||
.all_recipients()
|
||||
.any(|r| r.email.domain_part.sld == hostname)
|
||||
{
|
||||
ctx.result.add_tag("RCPT_DOMAIN_IN_SUBJECT");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate encoding
|
||||
let subject_raw_utf8 = std::str::from_utf8(subject_raw);
|
||||
if !subject_raw.is_ascii() {
|
||||
if (ctx.input.env_from_flags
|
||||
& (MAIL_SMTPUTF8 | MAIL_BODY_8BITMIME | MAIL_BODY_BINARYMIME))
|
||||
== 0
|
||||
{
|
||||
ctx.result.add_tag("SUBJECT_NEEDS_ENCODING");
|
||||
}
|
||||
|
||||
if subject_raw_utf8.is_err() {
|
||||
ctx.result.add_tag("INVALID_SUBJECT_8BIT");
|
||||
}
|
||||
}
|
||||
|
||||
// Validate unnecessary encoding
|
||||
let subject_raw_utf8 = subject_raw_utf8.unwrap_or_default();
|
||||
if is_ascii && subject_raw_utf8.contains("=?") && subject_raw_utf8.contains("?=") {
|
||||
if subject_raw_utf8.contains("?q?") || subject_raw_utf8.contains("?Q?") {
|
||||
// Subject header is unnecessarily encoded in quoted-printable
|
||||
ctx.result.add_tag("SUBJ_EXCESS_QP");
|
||||
} else if subject_raw_utf8.contains("?b?") || subject_raw_utf8.contains("?B?") {
|
||||
// Subject header is unnecessarily encoded in base64
|
||||
ctx.result.add_tag("SUBJ_EXCESS_BASE64");
|
||||
}
|
||||
}
|
||||
|
||||
if !is_reply && ctx.output.subject.trim().starts_with("re:") {
|
||||
// Subject is not a reply but starts with "re:"
|
||||
ctx.result.add_tag("FAKE_REPLY");
|
||||
}
|
||||
}
|
||||
}
|
324
crates/spam-filter/src/analysis/url.rs
Normal file
324
crates/spam-filter/src/analysis/url.rs
Normal file
|
@ -0,0 +1,324 @@
|
|||
use std::{borrow::Cow, future::Future, time::Duration};
|
||||
|
||||
use common::Server;
|
||||
use common::{config::spamfilter::Target, scripts::functions::unicode::CharUtils};
|
||||
use hyper::{
|
||||
header::{HeaderName, LOCATION},
|
||||
Uri,
|
||||
};
|
||||
use nlp::tokenizers::types::TokenType;
|
||||
use reqwest::redirect::Policy;
|
||||
use unicode_security::MixedScript;
|
||||
|
||||
use crate::modules::dnsbl::is_dnsbl;
|
||||
use crate::modules::remote_list::is_in_remote_list;
|
||||
use crate::{
|
||||
modules::html::{HtmlToken, A, HREF},
|
||||
Hostname, SpamFilterContext, TextPart,
|
||||
};
|
||||
|
||||
pub trait SpamFilterAnalyzeUrl: Sync + Send {
|
||||
fn spam_filter_analyze_url(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeUrl for Server {
|
||||
async fn spam_filter_analyze_url(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
for (part_id, part) in ctx.output.text_parts.iter().enumerate() {
|
||||
if ctx.input.message.text_body.contains(&part_id)
|
||||
|| ctx.input.message.html_body.contains(&part_id)
|
||||
{
|
||||
let is_single = match part {
|
||||
TextPart::Plain { tokens, .. } => is_single_url(tokens),
|
||||
TextPart::Html {
|
||||
html_tokens,
|
||||
tokens,
|
||||
..
|
||||
} => is_single_html_url(html_tokens, tokens),
|
||||
TextPart::None => false,
|
||||
};
|
||||
|
||||
if is_single {
|
||||
ctx.result.add_tag("URL_ONLY");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for url in &ctx.output.urls {
|
||||
for ch in url.chars() {
|
||||
if ch.is_zwsp() {
|
||||
ctx.result.add_tag("ZERO_WIDTH_SPACE_URL");
|
||||
}
|
||||
|
||||
if ch.is_obscured() {
|
||||
ctx.result.add_tag("R_SUSPICIOUS_URL");
|
||||
}
|
||||
}
|
||||
|
||||
// Skip non-URLs such as 'data:' and 'mailto:'
|
||||
if !url.contains("://") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse url
|
||||
let url_parsed = match url.parse::<Uri>() {
|
||||
Ok(url) if url.host().is_some() => url,
|
||||
_ => {
|
||||
// URL could not be parsed
|
||||
ctx.result.add_tag("R_SUSPICIOUS_URL");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let host = Hostname::new(url_parsed.host().unwrap());
|
||||
let host_sld = host.sld_or_default();
|
||||
|
||||
// Skip local and trusted domains
|
||||
if self.core.spam.list_trusted_domains.contains(host_sld)
|
||||
|| self
|
||||
.core
|
||||
.storage
|
||||
.directory
|
||||
.is_local_domain(host_sld)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for redirectors
|
||||
let mut redirected_urls = Vec::new();
|
||||
if host.ip.is_none() && self.core.spam.list_url_redirectors.contains(host_sld) {
|
||||
ctx.result.add_tag("REDIRECTOR_URL");
|
||||
|
||||
let mut redirect_count = 0;
|
||||
let mut url_redirect = Cow::Borrowed(url);
|
||||
|
||||
while redirect_count <= 0 {
|
||||
match http_get_header(url_redirect.as_ref(), LOCATION, Duration::from_secs(5))
|
||||
.await
|
||||
{
|
||||
Ok(Some(location)) => {
|
||||
if let Ok(location_parsed) = location.parse::<Uri>() {
|
||||
let host =
|
||||
Hostname::new(location_parsed.host().unwrap_or_default());
|
||||
if self
|
||||
.core
|
||||
.spam
|
||||
.list_url_redirectors
|
||||
.contains(host.sld_or_default())
|
||||
{
|
||||
url_redirect = Cow::Owned(location);
|
||||
redirect_count += 1;
|
||||
continue;
|
||||
} else {
|
||||
let location = location.to_lowercase();
|
||||
if !ctx.output.urls.contains(&location) {
|
||||
redirected_urls.push((
|
||||
Cow::Owned(location),
|
||||
location_parsed,
|
||||
host,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(err) => {
|
||||
trc::error!(err.span_id(ctx.input.span_id));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if redirect_count > 5 {
|
||||
ctx.result.add_tag("URL_REDIRECTOR_NESTED");
|
||||
}
|
||||
}
|
||||
|
||||
for (url, url_parsed, host) in [(Cow::Borrowed(url), url_parsed, host)]
|
||||
.into_iter()
|
||||
.chain(redirected_urls.into_iter())
|
||||
{
|
||||
let query = url_parsed
|
||||
.path_and_query()
|
||||
.map(|pq| pq.as_str())
|
||||
.unwrap_or_default();
|
||||
if host.ip.is_none() {
|
||||
if !host.fqdn.is_ascii() {
|
||||
if let Ok(cured_host) =
|
||||
decancer::cure(&host.fqdn, decancer::Options::default())
|
||||
{
|
||||
let cured_host = cured_host.to_string();
|
||||
if cured_host != host.fqdn
|
||||
&& matches!(self.core.dns_exists_ip(&cured_host).await, Ok(true))
|
||||
{
|
||||
ctx.result.add_tag("HOMOGRAPH_URL");
|
||||
}
|
||||
|
||||
if !cured_host.is_single_script() {
|
||||
ctx.result.add_tag("MIXED_CHARSET_URL");
|
||||
}
|
||||
}
|
||||
} else if matches!(host.sld.as_deref(), Some("googleusercontent.com"))
|
||||
&& query.starts_with("/proxy/")
|
||||
{
|
||||
ctx.result.add_tag("HAS_GUC_PROXY_URI");
|
||||
} else if host.fqdn.ends_with("firebasestorage.googleapis.com") {
|
||||
ctx.result.add_tag("HAS_GOOGLE_FIREBASE_URL");
|
||||
} else if host.sld_or_default().starts_with("google.") && query.contains("url?")
|
||||
{
|
||||
ctx.result.add_tag("HAS_GOOGLE_REDIR");
|
||||
}
|
||||
|
||||
if host.fqdn.contains("ipfs.")
|
||||
|| (query.contains("/ipfs") && query.contains("/qm"))
|
||||
{
|
||||
// InterPlanetary File System (IPFS) gateway URL, likely malicious
|
||||
ctx.result.add_tag("HAS_IPFS_GATEWAY_URL");
|
||||
} else if host.fqdn.ends_with(".onion") {
|
||||
// Onion URL
|
||||
ctx.result.add_tag("HAS_ONION_URI");
|
||||
}
|
||||
} else {
|
||||
// URL is an ip address
|
||||
ctx.result.add_tag("R_SUSPICIOUS_URL");
|
||||
}
|
||||
|
||||
if query.starts_with("/wp-") {
|
||||
// Contains WordPress URIs
|
||||
ctx.result.add_tag("HAS_WP_URI");
|
||||
|
||||
if query.starts_with("/wp-content") || query.starts_with("/wp-includes") {
|
||||
// URL that is pointing to a compromised WordPress installation
|
||||
ctx.result.add_tag("WP_COMPROMISED");
|
||||
}
|
||||
}
|
||||
|
||||
if query.contains("/../")
|
||||
&& !query.contains("/.well-known")
|
||||
&& !query.contains("/.well_known")
|
||||
{
|
||||
// Message contains URI with a hidden path
|
||||
ctx.result.add_tag("URI_HIDDEN_PATH");
|
||||
}
|
||||
|
||||
// Check remote lists
|
||||
for remote in &self.core.spam.remote_lists {
|
||||
if matches!(remote.target, Target::Url)
|
||||
&& is_in_remote_list(self, remote, url.as_ref(), ctx.input.span_id).await
|
||||
{
|
||||
ctx.result.add_tag(&remote.tag);
|
||||
}
|
||||
}
|
||||
|
||||
// Check DNSBL
|
||||
for dnsbl in &self.core.spam.dnsbls {
|
||||
if matches!(dnsbl.target, Target::Url) {
|
||||
if let Some(tag) =
|
||||
is_dnsbl(self, dnsbl, url.as_ref(), ctx.input.span_id).await
|
||||
{
|
||||
ctx.result.add_tag(tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn http_get_header(
|
||||
url: &str,
|
||||
header: HeaderName,
|
||||
timeout: Duration,
|
||||
) -> trc::Result<Option<String>> {
|
||||
reqwest::Client::builder()
|
||||
.user_agent("Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0")
|
||||
.timeout(timeout)
|
||||
.redirect(Policy::none())
|
||||
.danger_accept_invalid_certs(true)
|
||||
.build()
|
||||
.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.details("Failed to build request")
|
||||
})?
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.details("Failed to send request")
|
||||
})
|
||||
.map(|response| {
|
||||
response
|
||||
.headers()
|
||||
.get(header)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|h| h.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn is_single_url<T: AsRef<str>>(tokens: &[TokenType<T>]) -> bool {
|
||||
let mut url_count = 0;
|
||||
let mut word_count = 0;
|
||||
|
||||
for token in tokens {
|
||||
match token {
|
||||
TokenType::Alphabetic(_)
|
||||
| TokenType::Alphanumeric(_)
|
||||
| TokenType::Integer(_)
|
||||
| TokenType::Email(_)
|
||||
| TokenType::Float(_) => {
|
||||
word_count += 1;
|
||||
}
|
||||
TokenType::Url(_) | TokenType::UrlNoScheme(_) => {
|
||||
url_count += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
url_count == 1 && word_count <= 1
|
||||
}
|
||||
|
||||
fn is_single_html_url<T: AsRef<str>>(html_tokens: &[HtmlToken], tokens: &[TokenType<T>]) -> bool {
|
||||
let mut url_count = 0;
|
||||
let mut word_count = 0;
|
||||
|
||||
for token in tokens {
|
||||
match token {
|
||||
TokenType::Alphabetic(_)
|
||||
| TokenType::Alphanumeric(_)
|
||||
| TokenType::Integer(_)
|
||||
| TokenType::Email(_)
|
||||
| TokenType::Float(_) => {
|
||||
word_count += 1;
|
||||
}
|
||||
TokenType::Url(_) | TokenType::UrlNoScheme(_) => {
|
||||
url_count += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if word_count > 1 || url_count != 1 {
|
||||
return false;
|
||||
}
|
||||
|
||||
url_count = 0;
|
||||
|
||||
for token in html_tokens {
|
||||
if matches!(token, HtmlToken::StartTag { name, attributes } if *name == A && attributes.iter().any(|(k, _)| *k == HREF))
|
||||
{
|
||||
url_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
url_count == 1
|
||||
}
|
|
@ -7,10 +7,13 @@ use std::net::IpAddr;
|
|||
|
||||
use mail_auth::{dmarc::Policy, ArcOutput, DkimOutput, DmarcResult, IprevOutput, SpfOutput};
|
||||
use mail_parser::Message;
|
||||
use modules::html::HtmlToken;
|
||||
use nlp::tokenizers::types::TokenType;
|
||||
use store::ahash::AHashSet;
|
||||
|
||||
pub struct SpamFilterInput<'x> {
|
||||
pub message: &'x Message<'x>,
|
||||
pub span_id: u64,
|
||||
|
||||
// Sender authentication
|
||||
pub arc_result: &'x ArcOutput<'x>,
|
||||
|
@ -36,7 +39,7 @@ pub struct SpamFilterInput<'x> {
|
|||
pub env_rcpt_to: &'x [&'x str],
|
||||
}
|
||||
|
||||
pub struct SpamFilterOutput {
|
||||
pub struct SpamFilterOutput<'x> {
|
||||
pub ehlo_host: Hostname,
|
||||
pub iprev_ptr: Option<String>,
|
||||
|
||||
|
@ -51,6 +54,23 @@ pub struct SpamFilterOutput {
|
|||
|
||||
pub subject: String,
|
||||
pub subject_thread: String,
|
||||
pub subject_tokens: Vec<TokenType<&'x str>>,
|
||||
|
||||
pub text_parts: Vec<TextPart<'x>>,
|
||||
pub urls: HashSet<String>,
|
||||
}
|
||||
|
||||
pub enum TextPart<'x> {
|
||||
Plain {
|
||||
text_body: &'x str,
|
||||
tokens: Vec<TokenType<&'x str>>,
|
||||
},
|
||||
Html {
|
||||
html_tokens: Vec<HtmlToken>,
|
||||
text_body: String,
|
||||
tokens: Vec<TokenType<String>>,
|
||||
},
|
||||
None,
|
||||
}
|
||||
|
||||
pub struct SpamFilterResult {
|
||||
|
@ -59,7 +79,7 @@ pub struct SpamFilterResult {
|
|||
|
||||
pub struct SpamFilterContext<'x> {
|
||||
pub input: SpamFilterInput<'x>,
|
||||
pub output: SpamFilterOutput,
|
||||
pub output: SpamFilterOutput<'x>,
|
||||
pub result: SpamFilterResult,
|
||||
}
|
||||
|
||||
|
|
53
crates/spam-filter/src/modules/dnsbl.rs
Normal file
53
crates/spam-filter/src/modules/dnsbl.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
use std::time::Instant;
|
||||
|
||||
use common::{config::spamfilter::DnsblConfig, Server};
|
||||
use mail_auth::Error;
|
||||
use trc::SpamEvent;
|
||||
|
||||
pub async fn is_dnsbl(
|
||||
server: &Server,
|
||||
config: &DnsblConfig,
|
||||
item: &str,
|
||||
span_id: u64,
|
||||
) -> Option<String> {
|
||||
let time = Instant::now();
|
||||
let zone = server
|
||||
.eval_expr::<String, _>(&config.zone, &item, &config.id, span_id)
|
||||
.await?;
|
||||
let todo = "use proper event error";
|
||||
|
||||
match server.core.smtp.resolvers.dns.ipv4_lookup(&zone).await {
|
||||
Ok(result) => {
|
||||
let result = result.iter().map(|ip| ip.to_string()).collect::<Vec<_>>();
|
||||
|
||||
trc::event!(
|
||||
Spam(SpamEvent::Classify),
|
||||
Result = result
|
||||
.iter()
|
||||
.map(|ip| trc::Value::from(ip.clone()))
|
||||
.collect::<Vec<_>>(),
|
||||
Elapsed = time.elapsed()
|
||||
);
|
||||
|
||||
server.eval_if(&config.tags, &result, span_id).await
|
||||
}
|
||||
Err(Error::DnsRecordNotFound(_)) => {
|
||||
trc::event!(
|
||||
Spam(SpamEvent::Classify),
|
||||
Result = trc::Value::None,
|
||||
Elapsed = time.elapsed()
|
||||
);
|
||||
|
||||
None
|
||||
}
|
||||
Err(err) => {
|
||||
trc::event!(
|
||||
Spam(SpamEvent::Classify),
|
||||
Elapsed = time.elapsed(),
|
||||
CausedBy = err.to_string()
|
||||
);
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
382
crates/spam-filter/src/modules/html.rs
Normal file
382
crates/spam-filter/src/modules/html.rs
Normal file
|
@ -0,0 +1,382 @@
|
|||
use mail_parser::decoders::html::add_html_token;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub enum HtmlToken {
|
||||
StartTag {
|
||||
name: u64,
|
||||
attributes: Vec<(u64, Option<String>)>,
|
||||
},
|
||||
EndTag {
|
||||
name: u64,
|
||||
},
|
||||
Comment {
|
||||
text: String,
|
||||
},
|
||||
Text {
|
||||
text: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) const A: u64 = b'a' as u64;
|
||||
|
||||
pub(crate) const HREF: u64 =
|
||||
(b'h' as u64) | (b'r' as u64) << 8 | (b'e' as u64) << 16 | (b'f' as u64) << 24;
|
||||
pub(crate) const SRC: u64 = (b's' as u64) | (b'r' as u64) << 8 | (b'c' as u64) << 16;
|
||||
|
||||
pub fn html_to_tokens(input: &str) -> Vec<HtmlToken> {
|
||||
let input = input.as_bytes();
|
||||
let mut iter = input.iter().enumerate().peekable();
|
||||
let mut tags = vec![];
|
||||
|
||||
let mut is_token_start = true;
|
||||
let mut is_after_space = false;
|
||||
let mut is_new_line = true;
|
||||
|
||||
let mut token_start = 0;
|
||||
let mut token_end = 0;
|
||||
|
||||
let mut text = String::new();
|
||||
|
||||
while let Some((mut pos, &ch)) = iter.next() {
|
||||
match ch {
|
||||
b'<' => {
|
||||
if !is_token_start {
|
||||
add_html_token(
|
||||
&mut text,
|
||||
&input[token_start..token_end + 1],
|
||||
is_after_space,
|
||||
);
|
||||
is_after_space = false;
|
||||
is_token_start = true;
|
||||
}
|
||||
if !text.is_empty() {
|
||||
tags.push(HtmlToken::Text {
|
||||
text: std::mem::take(&mut text),
|
||||
});
|
||||
}
|
||||
|
||||
while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace()) {
|
||||
pos += 1;
|
||||
iter.next();
|
||||
}
|
||||
|
||||
if matches!(input.get(pos + 1..pos + 4), Some(b"!--")) {
|
||||
let mut comment = Vec::new();
|
||||
let mut last_ch: u8 = 0;
|
||||
for (_, &ch) in iter.by_ref() {
|
||||
match ch {
|
||||
b'>' if comment.len() > 2
|
||||
&& matches!(comment.last(), Some(b'-'))
|
||||
&& matches!(comment.get(comment.len() - 2), Some(b'-')) =>
|
||||
{
|
||||
break;
|
||||
}
|
||||
b' ' | b'\t' | b'\r' | b'\n' => {
|
||||
if last_ch != b' ' {
|
||||
comment.push(b' ');
|
||||
} else {
|
||||
last_ch = b' ';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
_ => {
|
||||
comment.push(ch);
|
||||
}
|
||||
}
|
||||
last_ch = ch;
|
||||
}
|
||||
tags.push(HtmlToken::Comment {
|
||||
text: String::from_utf8(comment).unwrap_or_default(),
|
||||
});
|
||||
} else {
|
||||
let mut is_end_tag = false;
|
||||
loop {
|
||||
match iter.peek() {
|
||||
Some((_, &b'/')) => {
|
||||
is_end_tag = true;
|
||||
pos += 1;
|
||||
iter.next();
|
||||
}
|
||||
Some((_, ch)) if ch.is_ascii_whitespace() => {
|
||||
pos += 1;
|
||||
iter.next();
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
let mut in_quote = false;
|
||||
|
||||
let mut key: u64 = 0;
|
||||
let mut shift = 0;
|
||||
|
||||
let mut tag = 0;
|
||||
let mut attributes = vec![];
|
||||
|
||||
'outer: while let Some((_, &ch)) = iter.next() {
|
||||
match ch {
|
||||
b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' if shift < 64 => {
|
||||
key |= (ch as u64) << shift;
|
||||
shift += 8;
|
||||
}
|
||||
b'A'..=b'Z' if shift < 64 => {
|
||||
key |= ((ch - b'A' + b'a') as u64) << shift;
|
||||
shift += 8;
|
||||
}
|
||||
b'>' if !in_quote => {
|
||||
if shift != 0 {
|
||||
if tag == 0 {
|
||||
tag = key;
|
||||
} else {
|
||||
attributes.push((key, None));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
b'"' => {
|
||||
in_quote = !in_quote;
|
||||
}
|
||||
b'=' if !in_quote => {
|
||||
while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace())
|
||||
{
|
||||
iter.next();
|
||||
}
|
||||
|
||||
if shift != 0 {
|
||||
attributes.push((key, None));
|
||||
key = 0;
|
||||
shift = 0;
|
||||
}
|
||||
|
||||
let mut value = vec![];
|
||||
|
||||
for (_, &ch) in iter.by_ref() {
|
||||
match ch {
|
||||
b'>' if !in_quote => {
|
||||
if !value.is_empty() {
|
||||
attributes.last_mut().unwrap().1 =
|
||||
String::from_utf8(value)
|
||||
.unwrap_or_default()
|
||||
.into();
|
||||
}
|
||||
break 'outer;
|
||||
}
|
||||
b'"' => {
|
||||
if in_quote {
|
||||
in_quote = false;
|
||||
break;
|
||||
} else {
|
||||
in_quote = true;
|
||||
}
|
||||
}
|
||||
b' ' | b'\t' | b'\r' | b'\n' if !in_quote => {
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
value.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !value.is_empty() {
|
||||
attributes.last_mut().unwrap().1 =
|
||||
String::from_utf8(value).unwrap_or_default().into();
|
||||
}
|
||||
}
|
||||
b' ' | b'\t' | b'\r' | b'\n' => {
|
||||
if shift != 0 {
|
||||
if tag == 0 {
|
||||
tag = key;
|
||||
} else {
|
||||
attributes.push((key, None));
|
||||
}
|
||||
key = 0;
|
||||
shift = 0;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if tag != 0 {
|
||||
if is_end_tag {
|
||||
tags.push(HtmlToken::EndTag { name: tag });
|
||||
} else {
|
||||
tags.push(HtmlToken::StartTag {
|
||||
name: tag,
|
||||
attributes,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
b' ' | b'\t' | b'\r' | b'\n' => {
|
||||
if !is_token_start {
|
||||
add_html_token(
|
||||
&mut text,
|
||||
&input[token_start..token_end + 1],
|
||||
is_after_space && !is_new_line,
|
||||
);
|
||||
is_new_line = false;
|
||||
}
|
||||
is_after_space = true;
|
||||
is_token_start = true;
|
||||
continue;
|
||||
}
|
||||
b'&' if !is_token_start => {
|
||||
add_html_token(
|
||||
&mut text,
|
||||
&input[token_start..token_end + 1],
|
||||
is_after_space && !is_new_line,
|
||||
);
|
||||
is_new_line = false;
|
||||
is_token_start = true;
|
||||
is_after_space = false;
|
||||
}
|
||||
b';' if !is_token_start => {
|
||||
add_html_token(
|
||||
&mut text,
|
||||
&input[token_start..pos + 1],
|
||||
is_after_space && !is_new_line,
|
||||
);
|
||||
is_token_start = true;
|
||||
is_after_space = false;
|
||||
is_new_line = false;
|
||||
continue;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if is_token_start {
|
||||
token_start = pos;
|
||||
is_token_start = false;
|
||||
}
|
||||
token_end = pos;
|
||||
}
|
||||
|
||||
if !is_token_start {
|
||||
add_html_token(
|
||||
&mut text,
|
||||
&input[token_start..token_end + 1],
|
||||
is_after_space && !is_new_line,
|
||||
);
|
||||
}
|
||||
if !text.is_empty() {
|
||||
tags.push(HtmlToken::Text { text });
|
||||
}
|
||||
|
||||
tags
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_html_to_tokens_text() {
|
||||
let input = "Hello, world!";
|
||||
let tokens = html_to_tokens(input);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![HtmlToken::Text {
|
||||
text: "Hello, world!".to_string()
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_html_to_tokens_start_tag() {
|
||||
let input = "<div>";
|
||||
let tokens = html_to_tokens(input);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![HtmlToken::StartTag {
|
||||
name: 7760228,
|
||||
attributes: vec![]
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_html_to_tokens_end_tag() {
|
||||
let input = "</div>";
|
||||
let tokens = html_to_tokens(input);
|
||||
assert_eq!(tokens, vec![HtmlToken::EndTag { name: 7760228 }]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_html_to_tokens_comment() {
|
||||
let input = "<!-- This is a comment -->";
|
||||
let tokens = html_to_tokens(input);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![HtmlToken::Comment {
|
||||
text: "!-- This is a comment --".to_string()
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_html_to_tokens_mixed() {
|
||||
let input = "<div>Hello, <span>" world " </span>!</div>";
|
||||
let tokens = html_to_tokens(input);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
HtmlToken::StartTag {
|
||||
name: 7760228,
|
||||
attributes: vec![]
|
||||
},
|
||||
HtmlToken::Text {
|
||||
text: "Hello,".to_string()
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 1851879539,
|
||||
attributes: vec![]
|
||||
},
|
||||
HtmlToken::Text {
|
||||
text: " \" world \"".to_string()
|
||||
},
|
||||
HtmlToken::EndTag { name: 1851879539 },
|
||||
HtmlToken::Text {
|
||||
text: " !".to_string()
|
||||
},
|
||||
HtmlToken::EndTag { name: 7760228 }
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_html_to_tokens_with_attributes() {
|
||||
let input = r#"<input type="text" value="test"><single/><one attr/><a b=1 b c="123">"#;
|
||||
let tokens = html_to_tokens(input);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
HtmlToken::StartTag {
|
||||
name: 500186508905,
|
||||
attributes: vec![
|
||||
(1701869940, Some("text".to_string())),
|
||||
(435761734006, Some("test".to_string()))
|
||||
]
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 111516266162547,
|
||||
attributes: vec![]
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 6647407,
|
||||
attributes: vec![(1920234593, None)]
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 97,
|
||||
attributes: vec![
|
||||
(98, Some("1".to_string())),
|
||||
(98, None),
|
||||
(99, Some("123".to_string()))
|
||||
]
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1 +1,4 @@
|
|||
pub mod dnsbl;
|
||||
pub mod html;
|
||||
pub mod remote_list;
|
||||
pub mod sanitize;
|
||||
|
|
199
crates/spam-filter/src/modules/remote_list.rs
Normal file
199
crates/spam-filter/src/modules/remote_list.rs
Normal file
|
@ -0,0 +1,199 @@
|
|||
use std::{
|
||||
collections::HashSet,
|
||||
io::{BufRead, BufReader},
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
use common::{
|
||||
config::{
|
||||
scripts::RemoteList,
|
||||
spamfilter::{RemoteListConfig, RemoteListFormat},
|
||||
},
|
||||
HttpLimitResponse, Server, USER_AGENT,
|
||||
};
|
||||
use mail_auth::flate2;
|
||||
|
||||
pub async fn is_in_remote_list(
|
||||
server: &Server,
|
||||
config: &RemoteListConfig,
|
||||
item: &str,
|
||||
span_id: u64,
|
||||
) -> bool {
|
||||
match is_in_remote_list_(server, config, item, span_id).await {
|
||||
Ok(result) => result,
|
||||
Err(err) => {
|
||||
let mut _lock = server.inner.data.remote_lists.write();
|
||||
let list = _lock
|
||||
.entry(config.id.clone())
|
||||
.or_insert_with(|| RemoteList {
|
||||
entries: HashSet::new(),
|
||||
expires: Instant::now(),
|
||||
});
|
||||
|
||||
if list.expires > Instant::now() {
|
||||
list.entries.contains(item)
|
||||
} else {
|
||||
list.expires = Instant::now() + config.retry;
|
||||
trc::error!(err.span_id(span_id));
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn is_in_remote_list_(
|
||||
server: &Server,
|
||||
config: &RemoteListConfig,
|
||||
item: &str,
|
||||
span_id: u64,
|
||||
) -> trc::Result<bool> {
|
||||
#[cfg(feature = "test_mode")]
|
||||
{
|
||||
if (config.url.contains("open") && item.contains("open"))
|
||||
|| (config.url.contains("tank") && item.contains("tank"))
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
let todo = "update RuntimeError with SpamEvent error";
|
||||
|
||||
match server.inner.data.remote_lists.read().get(&config.id) {
|
||||
Some(remote_list) if remote_list.expires < Instant::now() => {
|
||||
return Ok(remote_list.entries.contains(item))
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let response = reqwest::Client::builder()
|
||||
.timeout(config.timeout)
|
||||
.user_agent(USER_AGENT)
|
||||
.build()
|
||||
.unwrap_or_default()
|
||||
.get(&config.url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.ctx(trc::Key::Url, config.url.to_string())
|
||||
.details("Failed to build request")
|
||||
})?;
|
||||
|
||||
if response.status().is_success() {
|
||||
let bytes = response
|
||||
.bytes_with_limit(config.max_size)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.ctx(trc::Key::Url, config.url.to_string())
|
||||
.details("Failed to fetch resource")
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.ctx(trc::Key::Url, config.url.to_string())
|
||||
.details("Resource is too large")
|
||||
})?;
|
||||
|
||||
let reader: Box<dyn std::io::Read> = if config.url.ends_with(".gz") {
|
||||
Box::new(flate2::read::GzDecoder::new(&bytes[..]))
|
||||
} else {
|
||||
Box::new(&bytes[..])
|
||||
};
|
||||
|
||||
// Lock remote list for writing
|
||||
let mut _lock = server.inner.data.remote_lists.write();
|
||||
let list = _lock
|
||||
.entry(config.id.to_string())
|
||||
.or_insert_with(|| RemoteList {
|
||||
entries: HashSet::new(),
|
||||
expires: Instant::now(),
|
||||
});
|
||||
|
||||
// Make sure that the list is still expired
|
||||
if list.expires > Instant::now() {
|
||||
return Ok(list.entries.contains(item));
|
||||
}
|
||||
|
||||
for (pos, line) in BufReader::new(reader).lines().enumerate() {
|
||||
let line_ = line.map_err(|err| {
|
||||
trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.reason(err)
|
||||
.ctx(trc::Key::Url, config.url.to_string())
|
||||
.details("Failed to read line")
|
||||
})?;
|
||||
// Clear list once the first entry has been successfully fetched, decompressed and UTF8-decoded
|
||||
if pos == 0 {
|
||||
list.entries.clear();
|
||||
}
|
||||
|
||||
match &config.format {
|
||||
RemoteListFormat::List => {
|
||||
let line = line_.trim();
|
||||
if !line.is_empty() {
|
||||
list.entries.insert(line.to_string());
|
||||
}
|
||||
}
|
||||
RemoteListFormat::Csv {
|
||||
column,
|
||||
separator,
|
||||
skip_first,
|
||||
} if pos > 0 || !*skip_first => {
|
||||
let mut in_quote = false;
|
||||
let mut col_num = 0;
|
||||
let mut entry = String::new();
|
||||
|
||||
for ch in line_.chars() {
|
||||
if ch != '"' {
|
||||
if ch == *separator && !in_quote {
|
||||
if col_num == *column {
|
||||
break;
|
||||
} else {
|
||||
col_num += 1;
|
||||
}
|
||||
} else if col_num == *column {
|
||||
entry.push(ch);
|
||||
if entry.len() > config.max_entry_size {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
in_quote = !in_quote;
|
||||
}
|
||||
}
|
||||
|
||||
if !entry.is_empty() {
|
||||
list.entries.insert(entry);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if list.entries.len() == config.max_entries {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
trc::event!(
|
||||
Spam(trc::SpamEvent::ListUpdated),
|
||||
Url = config.url.to_string(),
|
||||
Total = list.entries.len(),
|
||||
SpanId = span_id
|
||||
);
|
||||
|
||||
// Update expiration
|
||||
list.expires = Instant::now() + config.refresh;
|
||||
Ok(list.entries.contains(item))
|
||||
} else {
|
||||
trc::bail!(trc::SieveEvent::RuntimeError
|
||||
.into_err()
|
||||
.ctx(trc::Key::Code, response.status().as_u16())
|
||||
.ctx(trc::Key::Url, config.url.to_string())
|
||||
.details("Failed to fetch remote list"));
|
||||
}
|
||||
}
|
|
@ -4,7 +4,30 @@ use crate::{Email, Hostname};
|
|||
|
||||
impl Hostname {
|
||||
pub fn new(host: &str) -> Self {
|
||||
let fqdn = host.to_lowercase();
|
||||
let mut fqdn = host.to_lowercase();
|
||||
|
||||
// Decode punycode
|
||||
if fqdn.contains("xn--") {
|
||||
let mut decoded = String::with_capacity(fqdn.len());
|
||||
|
||||
for part in fqdn.split('.') {
|
||||
if !decoded.is_empty() {
|
||||
decoded.push('.');
|
||||
}
|
||||
|
||||
if let Some(puny) = part
|
||||
.strip_prefix("xn--")
|
||||
.and_then(idna::punycode::decode_to_string)
|
||||
{
|
||||
decoded.push_str(&puny);
|
||||
} else {
|
||||
decoded.push_str(part);
|
||||
}
|
||||
}
|
||||
|
||||
fqdn = decoded;
|
||||
}
|
||||
|
||||
let ip = fqdn
|
||||
.strip_prefix('[')
|
||||
.and_then(|ip| ip.strip_suffix(']'))
|
||||
|
@ -36,3 +59,9 @@ impl Email {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Hostname {
|
||||
pub fn sld_or_default(&self) -> &str {
|
||||
self.sld.as_deref().unwrap_or(self.fqdn.as_str())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,49 +0,0 @@
|
|||
|
||||
if eval "(contains(subject_lc, 'delivery') &&
|
||||
(contains(subject_lc, 'failed') ||
|
||||
contains(subject_lc, 'report') ||
|
||||
contains(subject_lc, 'status') ||
|
||||
contains(subject_lc, 'warning'))) ||
|
||||
(contains(subject_lc, 'failure') &&
|
||||
(contains(subject_lc, 'delivery') ||
|
||||
contains(subject_lc, 'notice') ||
|
||||
contains(subject_lc, 'mail') )) ||
|
||||
(contains(subject_lc, 'delivered') &&
|
||||
(contains(subject_lc, 'couldn\\'t be') ||
|
||||
contains(subject_lc, 'could not be') ||
|
||||
contains(subject_lc, 'hasn\\'t been') ||
|
||||
contains(subject_lc, 'has not been'))) ||
|
||||
contains(subject_lc, 'returned mail') ||
|
||||
contains(subject_lc, 'undeliverable') ||
|
||||
contains(subject_lc, 'undelivered')" {
|
||||
# Subject contains words or phrases typical for DSN
|
||||
let "t.SUBJ_BOUNCE_WORDS" "1";
|
||||
}
|
||||
|
||||
if eval "is_empty(envelope.from)" {
|
||||
if eval "eq_ignore_case(header.content-type, 'multipart/report') &&
|
||||
( eq_ignore_case(header.content-type.attr.report-type, 'delivery-status') ||
|
||||
eq_ignore_case(header.content-type.attr.report-type, 'disposition-notification'))" {
|
||||
let "t.BOUNCE" "1";
|
||||
} else {
|
||||
let "from" "to_lowercase(header.from)";
|
||||
|
||||
if eval "contains(from, 'mdaemon') && !is_empty(header.X-MDDSN-Message)" {
|
||||
let "t.BOUNCE" "1";
|
||||
} elsif eval "contains(from, 'postmaster') || contains(from, 'mailer-daemon')" {
|
||||
if eval "t.SUBJ_BOUNCE_WORDS" {
|
||||
let "t.BOUNCE" "1";
|
||||
} else {
|
||||
foreverypart {
|
||||
if eval "(eq_ignore_case(header.content-type.type, 'message') ||
|
||||
eq_ignore_case(header.content-type.type, 'text')) &&
|
||||
(eq_ignore_case(header.content-type.subtype, 'rfc822-headers') ||
|
||||
eq_ignore_case(header.content-type.subtype, 'rfc822'))" {
|
||||
let "t.BOUNCE" "1";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,93 +0,0 @@
|
|||
let "rcvd_raw" "header.received[*].raw";
|
||||
let "rcvd_count" "count(rcvd_raw)";
|
||||
|
||||
# Count received headers
|
||||
if eval "rcvd_count == 0" {
|
||||
let "t.RCVD_COUNT_ZERO" "1";
|
||||
} elsif eval "rcvd_count == 1" {
|
||||
let "t.RCVD_COUNT_ONE" "1";
|
||||
} elsif eval "rcvd_count == 2" {
|
||||
let "t.RCVD_COUNT_TWO" "1";
|
||||
} elsif eval "rcvd_count == 3" {
|
||||
let "t.RCVD_COUNT_THREE" "1";
|
||||
} elsif eval "rcvd_count <= 5" {
|
||||
let "t.RCVD_COUNT_FIVE" "1";
|
||||
} elsif eval "rcvd_count <= 7" {
|
||||
let "t.RCVD_COUNT_SEVEN" "1";
|
||||
} elsif eval "rcvd_count <= 12" {
|
||||
let "t.RCVD_COUNT_TWELVE" "1";
|
||||
}
|
||||
|
||||
# Received from an authenticated user
|
||||
if eval "!is_empty(env.authenticated_as)" {
|
||||
let "t.RCVD_VIA_SMTP_AUTH" "1";
|
||||
}
|
||||
|
||||
# Received headers have non-ASCII characters
|
||||
if eval "!is_ascii(rcvd_raw)" {
|
||||
let "t.RCVD_ILLEGAL_CHARS" "1";
|
||||
}
|
||||
|
||||
let "i" "0";
|
||||
let "tls_count" "0";
|
||||
let "rcvd_from_ip" "0";
|
||||
while "i < rcvd_count" {
|
||||
let "i" "i + 1";
|
||||
let "helo_domain" "received_part(i, 'from')";
|
||||
|
||||
# Check for a forged received trail
|
||||
if eval "!t.FORGED_RCVD_TRAIL" {
|
||||
let "iprev" "received_part(i, 'iprev')";
|
||||
|
||||
if eval "!is_empty(iprev) && !is_empty(helo_domain) && !eq_ignore_case(helo_domain, iprev)" {
|
||||
let "t.FORGED_RCVD_TRAIL" "1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "!t.PREVIOUSLY_DELIVERED" {
|
||||
let "for" "received_part(i, 'for')";
|
||||
# Recipient appears on Received trail
|
||||
if eval "!is_empty(for) && contains_ignore_case(recipients, for)" {
|
||||
let "t.PREVIOUSLY_DELIVERED" "1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "!t.RCVD_HELO_USER && eq_ignore_case(helo_domain, 'user')" {
|
||||
# Received: HELO contains 'user'
|
||||
let "t.RCVD_HELO_USER" "1";
|
||||
}
|
||||
|
||||
if eval "!is_empty(received_part(i, 'from.ip'))" {
|
||||
# Received from an IP address rather than a FQDN
|
||||
let "rcvd_from_ip" "rcvd_from_ip + 1";
|
||||
}
|
||||
|
||||
if eval "!is_empty(received_part(i, 'tls'))" {
|
||||
# Received with TLS
|
||||
let "tls_count" "tls_count + 1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "rcvd_from_ip >= 2 || (rcvd_from_ip == 1 && is_ip_addr(env.helo_domain))" {
|
||||
# Has two or more Received headers containing bare IP addresses
|
||||
let "t.RCVD_DOUBLE_IP_SPAM" "1";
|
||||
}
|
||||
|
||||
if eval "rcvd_count == 0" {
|
||||
# One received header in a message (currently zero but one header will be added later by the MTA)
|
||||
let "t.ONCE_RECEIVED" "1";
|
||||
|
||||
# Message has been directly delivered from MUA to local MX
|
||||
if eval "header.User-Agent.exists || header.X-Mailer.exists" {
|
||||
let "t.DIRECT_TO_MX" "1";
|
||||
}
|
||||
}
|
||||
|
||||
# Received with TLS checks
|
||||
if eval "rcvd_count > 0 && tls_count == rcvd_count && !is_empty(env.tls.version)" {
|
||||
let "t.RCVD_TLS_ALL" "1";
|
||||
} elsif eval "!is_empty(env.tls.version)" {
|
||||
let "t.RCVD_TLS_LAST" "1";
|
||||
} else {
|
||||
let "t.RCVD_NO_TLS_LAST" "1";
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
|
||||
let "raw_subject_lc" "to_lowercase(header.subject.raw)";
|
||||
let "is_ascii_subject" "is_ascii(subject_lc)";
|
||||
|
||||
if eval "len(subject_clean) >= 10 && count(tokenize(subject_clean, 'words')) > 1 && is_uppercase(subject_clean)" {
|
||||
# Subject contains mostly capital letters
|
||||
let "t.SUBJ_ALL_CAPS" "1";
|
||||
}
|
||||
|
||||
if eval "count_chars(subject_clean) > 200" {
|
||||
# Subject is very long
|
||||
let "t.LONG_SUBJ" "1";
|
||||
}
|
||||
|
||||
if eval "!is_empty(tokenize(subject_lc, 'uri_strict'))" {
|
||||
# Subject contains a URL
|
||||
let "t.URL_IN_SUBJECT" "1";
|
||||
}
|
||||
|
||||
if eval "!is_ascii(raw_subject_lc) && !env.param.smtputf8 && env.param.body != '8bitmime' && env.param.body != 'binarymime'" {
|
||||
# Subject needs encoding
|
||||
let "t.SUBJECT_NEEDS_ENCODING" "1";
|
||||
}
|
||||
|
||||
if eval "!header.Subject.exists" {
|
||||
# Missing subject header
|
||||
let "t.MISSING_SUBJECT" "1";
|
||||
} elsif eval "is_empty(trim(subject_lc))" {
|
||||
# Subject is empty
|
||||
let "t.EMPTY_SUBJECT" "1";
|
||||
}
|
||||
|
||||
if eval "is_ascii(subject_lc) && contains(raw_subject_lc, '=?') && contains(raw_subject_lc, '?=')" {
|
||||
if eval "contains(raw_subject_lc, '?q?')" {
|
||||
# Subject header is unnecessarily encoded in quoted-printable
|
||||
let "t.SUBJ_EXCESS_QP" "1";
|
||||
} elsif eval "contains(raw_subject_lc, '?b?')" {
|
||||
# Subject header is unnecessarily encoded in base64
|
||||
let "t.SUBJ_EXCESS_BASE64" "1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "starts_with(subject_lc, 're:') && is_empty(header.in-reply-to) && is_empty(header.references)" {
|
||||
# Fake reply
|
||||
let "t.FAKE_REPLY" "1";
|
||||
}
|
||||
|
||||
let "subject_lc_trim" "trim_end(subject_lc)";
|
||||
if eval "subject_lc != subject_lc_trim" {
|
||||
# Subject ends with space characters
|
||||
let "t.SUBJECT_ENDS_SPACES" "1";
|
||||
}
|
||||
|
||||
if eval "contains(subject_lc, '$') ||
|
||||
contains(subject_lc, '€') ||
|
||||
contains(subject_lc, '£') ||
|
||||
contains(subject_lc, '¥')" {
|
||||
# Subject contains currency symbols
|
||||
let "t.SUBJECT_HAS_CURRENCY" "1";
|
||||
}
|
||||
|
||||
if eval "ends_with(subject_lc_trim, '!')" {
|
||||
# Subject ends with an exclamation mark
|
||||
let "t.SUBJECT_ENDS_EXCLAIM" "1";
|
||||
} elsif eval "ends_with(subject_lc_trim, '?')" {
|
||||
# Subject ends with a question mark
|
||||
let "t.SUBJECT_ENDS_QUESTION" "1";
|
||||
}
|
||||
|
||||
if eval "contains(subject_lc_trim, '!')" {
|
||||
# Subject contains an exclamation mark
|
||||
let "t.SUBJECT_HAS_EXCLAIM" "1";
|
||||
}
|
||||
|
||||
if eval "contains(subject_lc_trim, '?')" {
|
||||
# Subject contains a question mark
|
||||
let "t.SUBJECT_HAS_QUESTION" "1";
|
||||
}
|
|
@ -1,125 +0,0 @@
|
|||
if eval "(count(body_urls) == 1 || count(html_body_urls) == 1) && count(tokenize(text_body, 'words')) == 0" {
|
||||
let "t.URL_ONLY" "1";
|
||||
}
|
||||
|
||||
if eval "has_zwsp(urls)" {
|
||||
let "t.ZERO_WIDTH_SPACE_URL" "1";
|
||||
} elsif eval "has_obscured(urls)" {
|
||||
let "t.R_SUSPICIOUS_URL" "1";
|
||||
}
|
||||
|
||||
let "i" "count(urls)";
|
||||
while "i > 0" {
|
||||
let "i" "i - 1";
|
||||
let "url" "urls[i]";
|
||||
|
||||
# Skip non-URLs such as 'data:' and 'mailto:'
|
||||
if eval "!contains(url, '://')" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let "host" "uri_part(url, 'host')";
|
||||
|
||||
if eval "!is_empty(host)" {
|
||||
let "is_ip" "is_ip_addr(host)";
|
||||
let "host" "puny_decode(host)";
|
||||
let "host_lc" "to_lowercase(host)";
|
||||
let "host_sld" "domain_part(host_lc, 'sld')";
|
||||
|
||||
# Skip local and trusted domains
|
||||
if eval "is_local_domain(DOMAIN_DIRECTORY, host_sld) || key_exists('spam-allow', host_sld)" {
|
||||
continue;
|
||||
}
|
||||
|
||||
if eval "!is_ip &&
|
||||
(!t.REDIRECTOR_URL || !t.URL_REDIRECTOR_NESTED) &&
|
||||
key_exists('spam-redirect', host_sld)" {
|
||||
let "t.REDIRECTOR_URL" "1";
|
||||
let "redir_count" "1";
|
||||
|
||||
while "redir_count <= 5" {
|
||||
# Use a custom user-agent and a 3 second timeout
|
||||
let "url_redirect" "http_header(url, 'Location', 'Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0', 3000)";
|
||||
if eval "!is_empty(url_redirect)" {
|
||||
let "url" "url_redirect";
|
||||
let "host" "uri_part(url, 'host')";
|
||||
let "is_ip" "is_ip_addr(host)";
|
||||
let "host" "puny_decode(host)";
|
||||
let "host_lc" "to_lowercase(host)";
|
||||
let "host_sld" "domain_part(host_lc, 'sld')";
|
||||
|
||||
if eval "!is_ip && key_exists('spam-redirect', host_sld)" {
|
||||
let "redir_count" "redir_count + 1";
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if eval "redir_count > 5" {
|
||||
let "t.URL_REDIRECTOR_NESTED" "1";
|
||||
}
|
||||
}
|
||||
|
||||
let "url_lc" "to_lowercase(url)";
|
||||
let "query" "uri_part(url_lc, 'path_query')";
|
||||
if eval "!is_ip" {
|
||||
if eval "!is_ascii(host)" {
|
||||
let "host_cured" "cure_text(host)";
|
||||
if eval "host_lc != host_cured && dns_exists(host_cured, 'ip')" {
|
||||
let "t.HOMOGRAPH_URL" "1";
|
||||
}
|
||||
|
||||
if eval "!is_single_script(host)" {
|
||||
let "t.MIXED_CHARSET_URL" "1";
|
||||
}
|
||||
} else {
|
||||
if eval "ends_with(host, 'googleusercontent.com') && starts_with(query, '/proxy/')" {
|
||||
let "t.HAS_GUC_PROXY_URI" "1";
|
||||
} elsif eval "ends_with(host, 'firebasestorage.googleapis.com')" {
|
||||
let "t.HAS_GOOGLE_FIREBASE_URL" "1";
|
||||
} elsif eval "starts_with(domain_part(host, 'sld'), 'google.') && contains(query, 'url?') " {
|
||||
let "t.HAS_GOOGLE_REDIR" "1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "(contains(host_lc, 'ipfs.') || contains(query, '/ipfs')) && contains(query, '/qm')" {
|
||||
# InterPlanetary File System (IPFS) gateway URL, likely malicious
|
||||
let "t.HAS_IPFS_GATEWAY_URL" "1";
|
||||
} elsif eval "ends_with(host_lc, '.onion')" {
|
||||
let "t.HAS_ONION_URI" "1";
|
||||
}
|
||||
} else {
|
||||
# URL is an ip address
|
||||
let "t.R_SUSPICIOUS_URL" "1";
|
||||
}
|
||||
|
||||
if eval "starts_with(query, '/wp-')" {
|
||||
# Contains WordPress URIs
|
||||
let "t.HAS_WP_URI" "1";
|
||||
if eval "starts_with(query, '/wp-content') | starts_with(query, '/wp-includes')" {
|
||||
# URL that is pointing to a compromised WordPress installation
|
||||
let "t.WP_COMPROMISED" "1";
|
||||
}
|
||||
}
|
||||
if eval "contains(query, '/../') && !contains(query, '/well-known') && !contains(query, '/well_known')" {
|
||||
# Message contains URI with a hidden path
|
||||
let "t.URI_HIDDEN_PATH" "1";
|
||||
}
|
||||
|
||||
# Phishing checks (refresh OpenPhish every 12 hours, PhishTank every 6 hours)
|
||||
if eval "key_exists_http('https://openphish.com/feed.txt', url, [43200, 'list'])" {
|
||||
let "t.PHISHED_OPENPHISH" "1";
|
||||
}
|
||||
if eval "key_exists_http('http://data.phishtank.com/data/online-valid.csv', url, [21600, 'csv', 1, ',', true])" {
|
||||
let "t.PHISHED_PHISHTANK" "1";
|
||||
}
|
||||
|
||||
} else {
|
||||
# URL could not be parsed
|
||||
let "t.R_SUSPICIOUS_URL" "1";
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue