Port Spam filter to Rust - part 3

2025-09-11 14:34:16 +08:00 · 2024-12-09 17:49:11 +01:00 · 2024-12-09 17:49:11 +01:00 · f0d84c8e68
commit f0d84c8e68
parent 4453dc8f3d
34 changed files with 1791 additions and 653 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -6447,16 +6447,21 @@ name = "spam-filter"
 version = "0.10.7"
 dependencies = [
 "common",
+ "decancer",
+ "hyper 1.5.1",
+ "idna 1.0.3",
 "mail-auth",
 "mail-builder",
 "mail-parser",
 "mail-send",
 "nlp",
 "psl",
+ "reqwest 0.12.9",
 "smtp-proto",
 "store",
 "tokio",
 "trc",
+ "unicode-security",
 "utils",
 ]

--- a/crates/common/src/config/spamfilter.rs
+++ b/crates/common/src/config/spamfilter.rs
@ -4,14 +4,65 @@
 * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
 */

+use std::time::Duration;
+
 use utils::{config::Config, glob::GlobSet};

+use super::{if_block::IfBlock, Expression};
+
 #[derive(Debug, Clone, Default)]
 pub struct SpamFilterConfig {
    pub list_dmarc_allow: GlobSet,
    pub list_spf_dkim_allow: GlobSet,
    pub list_freemail_providers: GlobSet,
    pub list_disposable_providers: GlobSet,
+    pub list_trusted_domains: GlobSet,
+    pub list_url_redirectors: GlobSet,
+    pub remote_lists: Vec<RemoteListConfig>,
+    pub dnsbls: Vec<DnsblConfig>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum Target {
+    Url,
+    Domain,
+    Email,
+    Ip,
+    Ipv4,
+    Ipv6,
+}
+
+#[derive(Debug, Clone)]
+pub struct RemoteListConfig {
+    pub id: String,
+    pub url: String,
+    pub retry: Duration,       // 1 hour
+    pub refresh: Duration,     // 12h openphish, 6h phishtank
+    pub timeout: Duration,     // 10s
+    pub max_size: usize,       // 10MB
+    pub max_entries: usize,    // 100000
+    pub max_entry_size: usize, // 256
+    pub format: RemoteListFormat,
+    pub target: Target,
+    pub tag: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct DnsblConfig {
+    pub id: String,
+    pub zone: Expression,
+    pub target: Target,
+    pub tags: IfBlock,
+}
+
+#[derive(Debug, Clone)]
+pub enum RemoteListFormat {
+    List,
+    Csv {
+        column: u32,
+        separator: char,
+        skip_first: bool,
+    },
 }

 impl SpamFilterConfig {
--- a/crates/common/src/expr/eval.rs
+++ b/crates/common/src/expr/eval.rs
@ -680,3 +680,21 @@ impl<'x> TryFrom<Variable<'x>> for StatusCode {
        }
    }
 }
+
+impl<'x> ResolveVariable for &'x str {
+    fn resolve_variable(&self, variable: u32) -> Variable<'x> {
+        match variable {
+            0 => Variable::String((*self).into()),
+            _ => Variable::Integer(0),
+        }
+    }
+}
+
+impl ResolveVariable for Vec<String> {
+    fn resolve_variable(&self, variable: u32) -> Variable<'_> {
+        match variable {
+            0 => Variable::Array(self.iter().map(|v| Variable::String(v.into())).collect()),
+            _ => Variable::Integer(0),
+        }
+    }
+}
--- a/crates/common/src/scripts/functions/mod.rs
+++ b/crates/common/src/scripts/functions/mod.rs
@ -4,15 +4,15 @@
 * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
 */

-mod array;
+pub mod array;
 mod email;
 mod header;
 pub mod html;
-mod image;
-mod misc;
+pub mod image;
+pub mod misc;
 pub mod text;
-mod unicode;
-mod url;
+pub mod unicode;
+pub mod url;

 use sieve::{runtime::Variable, FunctionMap};

--- a/crates/common/src/scripts/functions/unicode.rs
+++ b/crates/common/src/scripts/functions/unicode.rs
@ -43,7 +43,7 @@ pub fn fn_has_obscured<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
    .into()
 }

-trait CharUtils {
+pub trait CharUtils {
    fn is_zwsp(&self) -> bool;
    fn is_obscured(&self) -> bool;
 }
--- a/crates/common/src/scripts/plugins/lookup.rs
+++ b/crates/common/src/scripts/plugins/lookup.rs
@ -4,19 +4,10 @@
 * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
 */

-use std::{
-    collections::HashSet,
-    io::{BufRead, BufReader},
-    time::{Duration, Instant},
-};
-
-use mail_auth::flate2;
 use sieve::{runtime::Variable, FunctionMap};
 use store::{Deserialize, Value};

-use crate::{
-    config::scripts::RemoteList, scripts::into_sieve_value, HttpLimitResponse, USER_AGENT,
-};
+use crate::scripts::into_sieve_value;

 use super::PluginContext;

@ -32,10 +23,6 @@ pub fn register_set(plugin_id: u32, fnc_map: &mut FunctionMap) {
    fnc_map.set_external_function("key_set", plugin_id, 4);
 }

-pub fn register_remote(plugin_id: u32, fnc_map: &mut FunctionMap) {
-    fnc_map.set_external_function("key_exists_http", plugin_id, 3);
-}
-
 pub fn register_local_domain(plugin_id: u32, fnc_map: &mut FunctionMap) {
    fnc_map.set_external_function("is_local_domain", plugin_id, 2);
 }
@ -118,242 +105,6 @@ pub async fn exec_set(ctx: PluginContext<'_>) -> trc::Result<Variable> {
    .map(|_| true.into())
 }

-pub async fn exec_remote(ctx: PluginContext<'_>) -> trc::Result<Variable> {
-    match exec_remote_(&ctx).await {
-        Ok(result) => Ok(result),
-        Err(err) => {
-            // Something went wrong, try again in one hour
-            const RETRY: Duration = Duration::from_secs(3600);
-
-            let mut _lock = ctx.server.inner.data.remote_lists.write();
-            let list = _lock
-                .entry(ctx.arguments[0].to_string().to_string())
-                .or_insert_with(|| RemoteList {
-                    entries: HashSet::new(),
-                    expires: Instant::now(),
-                });
-
-            if list.expires > Instant::now() {
-                Ok(list
-                    .entries
-                    .contains(ctx.arguments[1].to_string().as_ref())
-                    .into())
-            } else {
-                list.expires = Instant::now() + RETRY;
-                Err(err)
-            }
-        }
-    }
-}
-
-const MAX_RESOURCE_SIZE: usize = 10 * 1024 * 1024;
-
-async fn exec_remote_(ctx: &PluginContext<'_>) -> trc::Result<Variable> {
-    let resource = ctx.arguments[0].to_string();
-    let item = ctx.arguments[1].to_string();
-
-    #[cfg(feature = "test_mode")]
-    {
-        if (resource.contains("open") && item.contains("open"))
-            || (resource.contains("tank") && item.contains("tank"))
-        {
-            return Ok(true.into());
-        }
-    }
-
-    if resource.is_empty() || item.is_empty() {
-        return Ok(false.into());
-    }
-
-    const TIMEOUT: Duration = Duration::from_secs(45);
-    const MAX_ENTRY_SIZE: usize = 256;
-    const MAX_ENTRIES: usize = 100000;
-
-    match ctx
-        .server
-        .inner
-        .data
-        .remote_lists
-        .read()
-        .get(resource.as_ref())
-    {
-        Some(remote_list) if remote_list.expires < Instant::now() => {
-            return Ok(remote_list.entries.contains(item.as_ref()).into())
-        }
-        _ => {}
-    }
-
-    enum Format {
-        List,
-        Csv {
-            column: u32,
-            separator: char,
-            skip_first: bool,
-        },
-    }
-
-    // Obtain parameters
-    let mut format = Format::List;
-    let mut expires = Duration::from_secs(12 * 3600);
-
-    if let Some(arr) = ctx.arguments[2].as_array() {
-        // Obtain expiration
-        match arr.first() {
-            Some(Variable::Integer(v)) if *v > 0 => {
-                expires = Duration::from_secs(*v as u64);
-            }
-            Some(Variable::Float(v)) if *v > 0.0 => {
-                expires = Duration::from_secs(*v as u64);
-            }
-            _ => (),
-        }
-
-        // Obtain list type
-        if matches!(arr.get(1), Some(Variable::String(list_type)) if list_type.eq_ignore_ascii_case("csv"))
-        {
-            format = Format::Csv {
-                column: arr.get(2).map(|v| v.to_integer()).unwrap_or_default() as u32,
-                separator: arr
-                    .get(3)
-                    .and_then(|v| v.to_string().chars().next())
-                    .unwrap_or(','),
-                skip_first: arr.get(4).map_or(false, |v| v.to_bool()),
-            };
-        }
-    }
-
-    let response = reqwest::Client::builder()
-        .timeout(TIMEOUT)
-        .user_agent(USER_AGENT)
-        .build()
-        .unwrap_or_default()
-        .get(resource.as_ref())
-        .send()
-        .await
-        .map_err(|err| {
-            trc::SieveEvent::RuntimeError
-                .into_err()
-                .reason(err)
-                .ctx(trc::Key::Url, resource.to_string())
-                .details("Failed to build request")
-        })?;
-
-    if response.status().is_success() {
-        let bytes = response
-            .bytes_with_limit(MAX_RESOURCE_SIZE)
-            .await
-            .map_err(|err| {
-                trc::SieveEvent::RuntimeError
-                    .into_err()
-                    .reason(err)
-                    .ctx(trc::Key::Url, resource.to_string())
-                    .details("Failed to fetch resource")
-            })?
-            .ok_or_else(|| {
-                trc::SieveEvent::RuntimeError
-                    .into_err()
-                    .ctx(trc::Key::Url, resource.to_string())
-                    .details("Resource is too large")
-            })?;
-
-        let reader: Box<dyn std::io::Read> = if resource.ends_with(".gz") {
-            Box::new(flate2::read::GzDecoder::new(&bytes[..]))
-        } else {
-            Box::new(&bytes[..])
-        };
-
-        // Lock remote list for writing
-        let mut _lock = ctx.server.inner.data.remote_lists.write();
-        let list = _lock
-            .entry(resource.to_string())
-            .or_insert_with(|| RemoteList {
-                entries: HashSet::new(),
-                expires: Instant::now(),
-            });
-
-        // Make sure that the list is still expired
-        if list.expires > Instant::now() {
-            return Ok(list.entries.contains(item.as_ref()).into());
-        }
-
-        for (pos, line) in BufReader::new(reader).lines().enumerate() {
-            let line_ = line.map_err(|err| {
-                trc::SieveEvent::RuntimeError
-                    .into_err()
-                    .reason(err)
-                    .ctx(trc::Key::Url, resource.to_string())
-                    .details("Failed to read line")
-            })?;
-            // Clear list once the first entry has been successfully fetched, decompressed and UTF8-decoded
-            if pos == 0 {
-                list.entries.clear();
-            }
-
-            match &format {
-                Format::List => {
-                    let line = line_.trim();
-                    if !line.is_empty() {
-                        list.entries.insert(line.to_string());
-                    }
-                }
-                Format::Csv {
-                    column,
-                    separator,
-                    skip_first,
-                } if pos > 0 || !*skip_first => {
-                    let mut in_quote = false;
-                    let mut col_num = 0;
-                    let mut entry = String::new();
-
-                    for ch in line_.chars() {
-                        if ch != '"' {
-                            if ch == *separator && !in_quote {
-                                if col_num == *column {
-                                    break;
-                                } else {
-                                    col_num += 1;
-                                }
-                            } else if col_num == *column {
-                                entry.push(ch);
-                                if entry.len() > MAX_ENTRY_SIZE {
-                                    break;
-                                }
-                            }
-                        } else {
-                            in_quote = !in_quote;
-                        }
-                    }
-
-                    if !entry.is_empty() {
-                        list.entries.insert(entry);
-                    }
-                }
-                _ => (),
-            }
-
-            if list.entries.len() == MAX_ENTRIES {
-                break;
-            }
-        }
-
-        trc::event!(
-            Spam(trc::SpamEvent::ListUpdated),
-            Url = resource.as_ref().to_string(),
-            Total = list.entries.len(),
-        );
-
-        // Update expiration
-        list.expires = Instant::now() + expires;
-        Ok(list.entries.contains(item.as_ref()).into())
-    } else {
-        trc::bail!(trc::SieveEvent::RuntimeError
-            .into_err()
-            .ctx(trc::Key::Code, response.status().as_u16())
-            .ctx(trc::Key::Url, resource.to_string())
-            .details("Failed to fetch remote list"));
-    }
-}
-
 pub async fn exec_local_domain(ctx: PluginContext<'_>) -> trc::Result<Variable> {
    let domain = ctx.arguments[1].to_string();

--- a/crates/common/src/scripts/plugins/mod.rs
+++ b/crates/common/src/scripts/plugins/mod.rs
@ -31,13 +31,12 @@ pub struct PluginContext<'x> {
    pub arguments: Vec<Variable>,
 }

-const PLUGINS_REGISTER: [RegisterPluginFnc; 14] = [
+const PLUGINS_REGISTER: [RegisterPluginFnc; 13] = [
    query::register,
    exec::register,
    lookup::register,
    lookup::register_get,
    lookup::register_set,
-    lookup::register_remote,
    lookup::register_local_domain,
    dns::register,
    dns::register_exists,
@ -86,15 +85,14 @@ impl Core {
            2 => lookup::exec(ctx).await,
            3 => lookup::exec_get(ctx).await,
            4 => lookup::exec_set(ctx).await,
-            5 => lookup::exec_remote(ctx).await,
-            6 => lookup::exec_local_domain(ctx).await,
-            7 => dns::exec(ctx).await,
-            8 => dns::exec_exists(ctx).await,
-            9 => http::exec_header(ctx).await,
-            10 => headers::exec(ctx),
-            11 => text::exec_tokenize(ctx),
-            12 => text::exec_domain_part(ctx),
-            13 => llm_prompt::exec(ctx).await,
+            5 => lookup::exec_local_domain(ctx).await,
+            6 => dns::exec(ctx).await,
+            7 => dns::exec_exists(ctx).await,
+            8 => http::exec_header(ctx).await,
+            9 => headers::exec(ctx),
+            10 => text::exec_tokenize(ctx),
+            11 => text::exec_domain_part(ctx),
+            12 => llm_prompt::exec(ctx).await,
            _ => unreachable!(),
        };

--- a/crates/nlp/src/tokenizers/types.rs
+++ b/crates/nlp/src/tokenizers/types.rs
@ -727,6 +727,40 @@ impl<T> TokenType<T> {
    }
 }

+impl<T: AsRef<str>> TokenType<T> {
+    pub fn hostname(&self) -> Option<&str> {
+        match self {
+            TokenType::Url(url) => url.as_ref().split_once("://").map(|(_, host)| {
+                host.split_once('/')
+                    .map_or(host, |(h, _)| h.split_once(':').map_or(h, |(h, _)| h))
+            }),
+            TokenType::UrlNoScheme(url) => {
+                let url = url.as_ref();
+                url.split_once('/').map_or(url, |(host, _)| host).into()
+            }
+            TokenType::Email(email) => email.as_ref().rsplit_once('@').map(|(_, domain)| domain),
+            _ => None,
+        }
+    }
+
+    pub fn hostname_sld(&self) -> Option<&str> {
+        self.hostname().and_then(|host| psl::domain_str(host))
+    }
+
+    pub fn url_lowercase(&self, with_scheme_only: bool) -> Option<String> {
+        match self {
+            TokenType::Url(url) => url.as_ref().trim().to_lowercase().into(),
+            TokenType::UrlNoScheme(url) if !with_scheme_only => {
+                let url = url.as_ref();
+                format!("http:s//{}", url.trim().to_lowercase())
+                    .to_lowercase()
+                    .into()
+            }
+            _ => None,
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {

--- a/crates/spam-filter/Cargo.toml
+++ b/crates/spam-filter/Cargo.toml
@ -16,6 +16,11 @@ mail-builder = { version = "0.3", features = ["ludicrous_mode"] }
 mail-auth = { version = "0.5" }
 mail-send = { version = "0.4", default-features = false, features = ["cram-md5", "ring", "tls12"] }
 psl = "2"
+hyper = { version = "1.0.1", features = ["server", "http1", "http2"] }
+idna = "1.0"
+reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-webpki-roots", "http2", "stream"]}
+decancer = "3.0.1"
+unicode-security = "0.1.0"

 [features]
 test_mode = []
--- a/crates/spam-filter/src/analysis/bounce.rs
+++ b/crates/spam-filter/src/analysis/bounce.rs
@ -0,0 +1,107 @@
+use std::future::Future;
+
+use common::Server;
+use mail_parser::MimeHeaders;
+
+use crate::SpamFilterContext;
+
+pub trait SpamFilterAnalyzeBounce: Sync + Send {
+    fn spam_filter_analyze_bounce(
+        &self,
+        ctx: &mut SpamFilterContext<'_>,
+    ) -> impl Future<Output = ()> + Send;
+}
+
+impl SpamFilterAnalyzeBounce for Server {
+    async fn spam_filter_analyze_bounce(&self, ctx: &mut SpamFilterContext<'_>) {
+        let mut has_delivery_word = false;
+        let mut has_undelivery_word = false;
+        let mut has_failure_word = false;
+        let mut has_report_word = false;
+        let mut has_not_word = false;
+
+        for word in ctx.output.subject.split_whitespace() {
+            match word {
+                "delivery" | "delivered" => {
+                    has_delivery_word = true;
+                }
+                "undeliverable" | "undelivered" => {
+                    has_undelivery_word = true;
+                }
+                "returned" | "failed" | "failure" | "warning" => {
+                    has_failure_word = true;
+                }
+
+                "notice" | "report" | "status" | "mail" => {
+                    has_report_word = true;
+                }
+
+                "couldn't" | "hasn't" | "not" => {
+                    has_not_word = true;
+                }
+                _ => {}
+            }
+        }
+
+        // Subject contains words or phrases typical for DSN
+        let has_bounce_words = has_undelivery_word
+            || (has_delivery_word && (has_failure_word || has_not_word))
+            || (has_report_word && has_failure_word);
+
+        if has_bounce_words {
+            ctx.result.add_tag("SUBJ_BOUNCE_WORDS");
+        }
+
+        if !ctx.input.env_from.is_empty() {
+            return;
+        }
+
+        match ctx.input.message.content_type() {
+            Some(ct)
+                if ct.ctype().eq_ignore_ascii_case("multipart")
+                    && ct
+                        .subtype()
+                        .map_or(false, |s| s.eq_ignore_ascii_case("report"))
+                    && ct.attribute("report-type").map_or(false, |a| {
+                        a.eq_ignore_ascii_case("delivery-status")
+                            || a.eq_ignore_ascii_case("disposition-notification")
+                    }) =>
+            {
+                // Message is a DSN
+                ctx.result.add_tag("BOUNCE");
+            }
+            _ => {
+                let from_local = &ctx.output.from.email.local_part;
+
+                if from_local.contains("mdaemon")
+                    && ctx.input.message.header("X-MDDSN-Message").is_some()
+                {
+                    // Message is a DSN
+                    ctx.result.add_tag("BOUNCE");
+                } else if from_local.contains("postmaster") || from_local.contains("mailer-daemon")
+                {
+                    if has_bounce_words {
+                        ctx.result.add_tag("BOUNCE");
+                    } else {
+                        for part in &ctx.input.message.parts {
+                            if let Some(ct) = part.content_type() {
+                                let st = ct.subtype().unwrap_or_default();
+                                let ct = ct.ctype();
+
+                                if (ct.eq_ignore_ascii_case("message")
+                                    || ct.eq_ignore_ascii_case("text"))
+                                    && (st.eq_ignore_ascii_case("rfc822-headers")
+                                        || st.eq_ignore_ascii_case("rfc822"))
+                                {
+                                    // Message is a DSN
+                                    ctx.result.add_tag("BOUNCE");
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
--- a/crates/spam-filter/src/analysis/date.rs
+++ b/crates/spam-filter/src/analysis/date.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;
 use store::write::now;

 use crate::SpamFilterContext;
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeDate: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeDate for Core {
+impl SpamFilterAnalyzeDate for Server {
    async fn spam_filter_analyze_date(&self, ctx: &mut SpamFilterContext<'_>) {
        if let Some(date) = ctx.input.message.date() {
            let date = date.to_timestamp();
--- a/crates/spam-filter/src/analysis/dmarc.rs
+++ b/crates/spam-filter/src/analysis/dmarc.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;
 use mail_auth::{
    common::verify::VerifySignature, dmarc::Policy, DkimResult, DmarcResult, SpfResult,
 };
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeDmarc: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeDmarc for Core {
+impl SpamFilterAnalyzeDmarc for Server {
    async fn spam_filter_analyze_dmarc(&self, ctx: &mut SpamFilterContext<'_>) {
        ctx.result
            .add_tag(match ctx.input.spf_mail_from_result.result() {
@ -75,6 +75,7 @@ impl SpamFilterAnalyzeDmarc for Core {
        }

        if self
+            .core
            .spam
            .list_dmarc_allow
            .contains(&ctx.output.from.email.domain_part.fqdn)
@ -85,6 +86,7 @@ impl SpamFilterAnalyzeDmarc for Core {
                ctx.result.add_tag("BLOCKLIST_DMARC");
            }
        } else if self
+            .core
            .spam
            .list_spf_dkim_allow
            .contains(&ctx.output.from.email.domain_part.fqdn)
--- a/crates/spam-filter/src/analysis/ehlo.rs
+++ b/crates/spam-filter/src/analysis/ehlo.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;

 use crate::SpamFilterContext;

@ -11,7 +11,7 @@ pub trait SpamFilterAnalyzeEhlo: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeEhlo for Core {
+impl SpamFilterAnalyzeEhlo for Server {
    async fn spam_filter_analyze_ehlo(&self, ctx: &mut SpamFilterContext<'_>) {
        if let Some(ehlo_ip) = ctx.output.ehlo_host.ip {
            // Helo host is bare ip
@ -34,8 +34,8 @@ impl SpamFilterAnalyzeEhlo for Core {

            if matches!(
                (
-                    self.dns_exists_ip(&ctx.output.ehlo_host.fqdn).await,
-                    self.dns_exists_mx(&ctx.output.ehlo_host.fqdn).await
+                    self.core.dns_exists_ip(&ctx.output.ehlo_host.fqdn).await,
+                    self.core.dns_exists_mx(&ctx.output.ehlo_host.fqdn).await
                ),
                (Ok(false), Ok(false))
            ) {
--- a/crates/spam-filter/src/analysis/from.rs
+++ b/crates/spam-filter/src/analysis/from.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;
 use mail_parser::HeaderName;
 use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};

@ -26,7 +26,7 @@ const SERVICE_ACCOUNTS: [&str; 9] = [
 ];
 pub(crate) const TITLES: [&str; 7] = ["mr. ", "mrs. ", "ms. ", "dr. ", "prof. ", "rev. ", "hon. "];

-impl SpamFilterAnalyzeFrom for Core {
+impl SpamFilterAnalyzeFrom for Server {
    async fn spam_filter_analyze_from(&self, ctx: &mut SpamFilterContext<'_>) {
        let mut from_count = 0;
        let mut from_raw = b"".as_slice();
@ -96,12 +96,14 @@ impl SpamFilterAnalyzeFrom for Core {
                    is_www_dot_domain = true;
                }
                if self
+                    .core
                    .spam
                    .list_freemail_providers
                    .contains(from_addr.domain_part.sld.as_deref().unwrap_or_default())
                {
                    ctx.result.add_tag("FREEMAIL_FROM");
                } else if self
+                    .core
                    .spam
                    .list_disposable_providers
                    .contains(from_addr.domain_part.sld.as_deref().unwrap_or_default())
@ -238,7 +240,7 @@ impl SpamFilterAnalyzeFrom for Core {
                if SERVICE_ACCOUNTS.contains(&ctx.output.env_from_addr.local_part.as_str()) {
                    ctx.result.add_tag("ENVFROM_SERVICE_ACCT");
                }
-                if self.spam.list_freemail_providers.contains(
+                if self.core.spam.list_freemail_providers.contains(
                    ctx.output
                        .env_from_addr
                        .domain_part
@ -247,7 +249,7 @@ impl SpamFilterAnalyzeFrom for Core {
                        .unwrap_or_default(),
                ) {
                    ctx.result.add_tag("FREEMAIL_ENVFROM");
-                } else if self.spam.list_disposable_providers.contains(
+                } else if self.core.spam.list_disposable_providers.contains(
                    ctx.output
                        .env_from_addr
                        .domain_part
@ -261,9 +263,11 @@ impl SpamFilterAnalyzeFrom for Core {
                // Mail from no resolve to A or MX
                if matches!(
                    (
-                        self.dns_exists_ip(&ctx.output.env_from_addr.domain_part.fqdn)
+                        self.core
+                            .dns_exists_ip(&ctx.output.env_from_addr.domain_part.fqdn)
                            .await,
-                        self.dns_exists_mx(&ctx.output.env_from_addr.domain_part.fqdn)
+                        self.core
+                            .dns_exists_mx(&ctx.output.env_from_addr.domain_part.fqdn)
                            .await
                    ),
                    (Ok(false), Ok(false))
--- a/crates/spam-filter/src/analysis/headers.rs
+++ b/crates/spam-filter/src/analysis/headers.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;
 use mail_parser::HeaderName;
 use store::ahash::AHashSet;

@ -13,7 +13,7 @@ pub trait SpamFilterAnalyzeHeaders: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeHeaders for Core {
+impl SpamFilterAnalyzeHeaders for Server {
    async fn spam_filter_analyze_headers(&self, ctx: &mut SpamFilterContext<'_>) {
        let mut list_score = 0.0;
        let mut unique_headers = AHashSet::new();
--- a/crates/spam-filter/src/analysis/init.rs
+++ b/crates/spam-filter/src/analysis/init.rs
@ -1,9 +1,13 @@
-use common::Core;
-use mail_parser::{parsers::fields::thread::thread_name, HeaderName};
+use std::collections::HashSet;
+
+use common::Server;
+use mail_parser::{parsers::fields::thread::thread_name, HeaderName, PartType};
+use nlp::tokenizers::types::{TokenType, TypesTokenizer};

 use crate::{
+    modules::html::{html_to_tokens, HtmlToken, HREF, SRC},
    Email, Hostname, Recipient, SpamFilterContext, SpamFilterInput, SpamFilterOutput,
-    SpamFilterResult,
+    SpamFilterResult, TextPart,
 };

 pub trait SpamFilterInit {
@ -12,9 +16,9 @@ pub trait SpamFilterInit {

 const POSTMASTER_ADDRESSES: [&str; 3] = ["postmaster", "mailer-daemon", "root"];

-impl SpamFilterInit for Core {
+impl SpamFilterInit for Server {
    fn spam_filter_init<'x>(&self, input: SpamFilterInput<'x>) -> SpamFilterContext<'x> {
-        let mut subject = String::new();
+        let mut subject = "";
        let mut from = None;
        let mut reply_to = None;
        let mut recipients_to = Vec::new();
@ -67,7 +71,7 @@ impl SpamFilterInit for Core {
                        });
                }
                HeaderName::Subject => {
-                    subject = header.value().as_text().unwrap_or_default().to_lowercase();
+                    subject = header.value().as_text().unwrap_or_default();
                }
                HeaderName::From => {
                    from = header.value().as_address().and_then(|addrs| addrs.first());
@ -76,6 +80,143 @@ impl SpamFilterInit for Core {
            }
        }

+        // Tokenize subject
+        let subject_tokens = TypesTokenizer::new(subject)
+            .tokenize_numbers(false)
+            .tokenize_urls(true)
+            .tokenize_urls_without_scheme(true)
+            .tokenize_emails(true)
+            .map(|t| t.word)
+            .collect::<Vec<_>>();
+        let subject = subject.to_lowercase();
+
+        // Tokenize and convert text parts
+        let mut text_parts = Vec::new();
+        let mut text_parts_nested = Vec::new();
+        let mut message_stack = Vec::new();
+        let mut message_iter = input.message.parts.iter();
+
+        loop {
+            while let Some(part) = message_iter.next() {
+                let is_main_message = message_stack.is_empty();
+                let text_part = match &part.body {
+                    PartType::Text(text) => TextPart::Plain {
+                        text_body: text.as_ref(),
+                        tokens: TypesTokenizer::new(text.as_ref())
+                            .tokenize_numbers(false)
+                            .tokenize_urls(true)
+                            .tokenize_urls_without_scheme(true)
+                            .tokenize_emails(true)
+                            .map(|t| t.word)
+                            .collect::<Vec<_>>(),
+                    },
+                    PartType::Html(html) => {
+                        let html_tokens = html_to_tokens(html);
+                        let text_body_len = html_tokens
+                            .iter()
+                            .filter_map(|t| match t {
+                                HtmlToken::Text { text } => text.len().into(),
+                                _ => None,
+                            })
+                            .sum();
+                        let mut text_body = String::with_capacity(text_body_len);
+                        for token in &html_tokens {
+                            if let HtmlToken::Text { text } = token {
+                                if !text_body.is_empty()
+                                    && !text_body.ends_with(' ')
+                                    && text.starts_with(' ')
+                                {
+                                    text_body.push(' ');
+                                }
+                                text_body.push_str(text)
+                            }
+                        }
+
+                        TextPart::Html {
+                            tokens: TypesTokenizer::new(&text_body)
+                                .tokenize_numbers(false)
+                                .tokenize_urls(true)
+                                .tokenize_urls_without_scheme(true)
+                                .tokenize_emails(true)
+                                .map(|t| match t.word {
+                                    TokenType::Alphabetic(s) => {
+                                        TokenType::Alphabetic(s.to_string())
+                                    }
+                                    TokenType::Alphanumeric(s) => {
+                                        TokenType::Alphanumeric(s.to_string())
+                                    }
+                                    TokenType::Integer(s) => TokenType::Integer(s.to_string()),
+                                    TokenType::Other(s) => TokenType::Other(s),
+                                    TokenType::Punctuation(s) => TokenType::Punctuation(s),
+                                    TokenType::Space => TokenType::Space,
+                                    TokenType::Url(s) => TokenType::Url(s.to_string()),
+                                    TokenType::UrlNoScheme(s) => {
+                                        TokenType::UrlNoScheme(s.to_string())
+                                    }
+                                    TokenType::UrlNoHost(s) => TokenType::UrlNoHost(s.to_string()),
+                                    TokenType::IpAddr(s) => TokenType::IpAddr(s.to_string()),
+                                    TokenType::Email(s) => TokenType::Email(s.to_string()),
+                                    TokenType::Float(s) => TokenType::Float(s.to_string()),
+                                })
+                                .collect::<Vec<_>>(),
+                            html_tokens,
+                            text_body,
+                        }
+                    }
+                    PartType::Message(message) => {
+                        message_stack.push(message_iter);
+                        message_iter = message.parts.iter();
+                        TextPart::None
+                    }
+                    _ => TextPart::None,
+                };
+
+                if is_main_message {
+                    text_parts.push(text_part);
+                } else if !matches!(text_part, TextPart::None) {
+                    text_parts_nested.push(text_part);
+                }
+            }
+
+            if let Some(iter) = message_stack.pop() {
+                message_iter = iter;
+            } else {
+                break;
+            }
+        }
+        text_parts.extend(text_parts_nested);
+
+        // Extract URLs
+        let mut urls: HashSet<String> =
+            HashSet::from_iter(subject_tokens.iter().filter_map(|t| t.url_lowercase(false)));
+        for part in &text_parts {
+            match part {
+                TextPart::Plain { tokens, .. } => {
+                    urls.extend(tokens.iter().filter_map(|t| t.url_lowercase(false)));
+                }
+                TextPart::Html {
+                    html_tokens,
+                    tokens,
+                    ..
+                } => {
+                    for token in html_tokens {
+                        if let HtmlToken::StartTag { attributes, .. } = token {
+                            for (attr, value) in attributes {
+                                match value {
+                                    Some(value) if [HREF, SRC].contains(attr) => {
+                                        urls.insert(value.trim().to_lowercase());
+                                    }
+                                    _ => {}
+                                }
+                            }
+                        }
+                    }
+                    urls.extend(tokens.iter().filter_map(|t| t.url_lowercase(false)));
+                }
+                TextPart::None => {}
+            }
+        }
+
        let env_from_addr = Email::new(input.env_from);
        SpamFilterContext {
            output: SpamFilterOutput {
@ -101,9 +242,12 @@ impl SpamFilterInit for Core {
                reply_to,
                subject_thread: thread_name(&subject).to_string(),
                subject,
+                subject_tokens,
                recipients_to,
                recipients_cc,
                recipients_bcc,
+                text_parts,
+                urls,
            },
            input,
            result: SpamFilterResult {
@ -117,7 +261,7 @@ impl SpamFilterInit for Core {

 use std::future::Future;

-use common::Core;
+use common::Server;

 use crate::SpamFilterContext;

@ -128,7 +272,7 @@ pub trait SpamFilterAnalyze!: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyze! for Core {
+impl SpamFilterAnalyze! for Server {
    async fn spam_filter_analyze_*(&self, ctx: &mut SpamFilterContext<'_>) {
        todo!()
    }
--- a/crates/spam-filter/src/analysis/iprev.rs
+++ b/crates/spam-filter/src/analysis/iprev.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;
 use mail_auth::IprevResult;

 use crate::SpamFilterContext;
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeIpRev: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeIpRev for Core {
+impl SpamFilterAnalyzeIpRev for Server {
    async fn spam_filter_analyze_iprev(&self, ctx: &mut SpamFilterContext<'_>) {
        match &ctx.input.iprev_result.result {
            IprevResult::TempError(_) => ctx.result.add_tag("RDNS_DNSFAIL"),
--- a/crates/spam-filter/src/analysis/messageid.rs
+++ b/crates/spam-filter/src/analysis/messageid.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;
 use mail_parser::HeaderName;

 use crate::{Hostname, SpamFilterContext};
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeMid: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeMid for Core {
+impl SpamFilterAnalyzeMid for Server {
    async fn spam_filter_analyze_message_id(&self, ctx: &mut SpamFilterContext<'_>) {
        let mid_raw = ctx
            .input
--- a/crates/spam-filter/src/analysis/mod.rs
+++ b/crates/spam-filter/src/analysis/mod.rs
@ -4,6 +4,7 @@ use mail_parser::{parsers::MessageStream, Header};

 use crate::{Recipient, SpamFilterInput, SpamFilterOutput, SpamFilterResult};

+pub mod bounce;
 pub mod date;
 pub mod dmarc;
 pub mod ehlo;
@ -12,8 +13,11 @@ pub mod headers;
 pub mod init;
 pub mod iprev;
 pub mod messageid;
+pub mod received;
 pub mod recipient;
 pub mod replyto;
+pub mod subject;
+pub mod url;

 impl SpamFilterInput<'_> {
    pub fn header_as_address(&self, header: &Header<'_>) -> Option<Cow<'_, str>> {
@ -27,7 +31,7 @@ impl SpamFilterInput<'_> {
    }
 }

-impl SpamFilterOutput {
+impl SpamFilterOutput<'_> {
    pub fn all_recipients(&self) -> impl Iterator<Item = &Recipient> {
        self.recipients_to
            .iter()
--- a/crates/spam-filter/src/analysis/received.rs
+++ b/crates/spam-filter/src/analysis/received.rs
@ -0,0 +1,146 @@
+use std::future::Future;
+
+use common::Server;
+use mail_parser::{HeaderName, Host};
+
+use crate::SpamFilterContext;
+
+pub trait SpamFilterAnalyzeReceived: Sync + Send {
+    fn spam_filter_analyze_received(
+        &self,
+        ctx: &mut SpamFilterContext<'_>,
+    ) -> impl Future<Output = ()> + Send;
+}
+
+impl SpamFilterAnalyzeReceived for Server {
+    async fn spam_filter_analyze_received(&self, ctx: &mut SpamFilterContext<'_>) {
+        let mut rcvd_count = 0;
+        let mut rcvd_from_ip = 0;
+        let mut tls_count = 0;
+        let mut has_ua = false;
+
+        for header in ctx.input.message.headers() {
+            match &header.name {
+                HeaderName::Received => {
+                    if !ctx
+                        .input
+                        .message
+                        .raw_message()
+                        .get(header.offset_start..header.offset_end)
+                        .unwrap_or_default()
+                        .is_ascii()
+                    {
+                        // Received headers have non-ASCII characters
+                        ctx.result.add_tag("RCVD_ILLEGAL_CHARS");
+                    }
+
+                    if let Some(received) = header.value().as_received() {
+                        let helo_domain = received.helo();
+                        let ip_rev = received.from_iprev();
+
+                        if matches!(&helo_domain, Some(Host::Name(hostname)) if hostname.eq_ignore_ascii_case("localhost"))
+                        {
+                            // HELO domain is "user"
+                            ctx.result.add_tag("RCVD_HELO_USER");
+                        } else if let (Some(Host::Name(helo_domain)), Some(ip_rev)) =
+                            (helo_domain, ip_rev)
+                        {
+                            if helo_domain.to_lowercase() != ip_rev.to_lowercase() {
+                                // HELO domain does not match PTR record
+                                ctx.result.add_tag("FORGED_RCVD_TRAIL");
+                            }
+                        }
+
+                        if let Some(delivered_for) = received.for_().map(|s| s.to_lowercase()) {
+                            if ctx
+                                .output
+                                .all_recipients()
+                                .any(|r| r.email.address == delivered_for)
+                            {
+                                // Recipient appears on Received trail
+                                ctx.result.add_tag("PREVIOUSLY_DELIVERED");
+                            }
+                        }
+
+                        if received.from_ip().is_some() {
+                            // Received from an IP address rather than a FQDN
+                            rcvd_from_ip += 1;
+                        }
+
+                        if received.tls_version().is_some() {
+                            // Received with TLS
+                            tls_count += 1;
+                        }
+                    } else {
+                        // Received header is not RFC 5322 compliant
+                        ctx.result.add_tag("RCVD_UNPARSABLE");
+                    }
+
+                    rcvd_count += 1;
+                }
+                HeaderName::Other(name) => {
+                    if !has_ua
+                        && (name.eq_ignore_ascii_case("User-Agent")
+                            || name.eq_ignore_ascii_case("X-Mailer"))
+                    {
+                        has_ua = true;
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        if rcvd_from_ip >= 2 || (rcvd_from_ip == 1 && ctx.output.ehlo_host.ip.is_some()) {
+            // Has two or more Received headers containing bare IP addresses
+            ctx.result.add_tag("RCVD_DOUBLE_IP_SPAM");
+        }
+
+        // Received from an authenticated user
+        if !ctx.input.authenticated_as.is_empty() {
+            ctx.result.add_tag("RCVD_VIA_SMTP_AUTH");
+        }
+
+        // Received with TLS checks
+        if rcvd_count > 0 && rcvd_count == tls_count && !ctx.input.tls_version.is_empty() {
+            ctx.result.add_tag("RCVD_TLS_ALL");
+        } else if !ctx.input.tls_version.is_empty() {
+            ctx.result.add_tag("RCVD_TLS_LAST");
+        } else {
+            ctx.result.add_tag("RCVD_NO_TLS_LAST");
+        }
+
+        match rcvd_count {
+            0 => {
+                ctx.result.add_tag("RCVD_COUNT_ZERO");
+
+                // One received header in a message (currently zero
+                // but one header will be added later by the MTA)
+                ctx.result.add_tag("ONCE_RECEIVED");
+
+                // Message has been directly delivered from MUA to local MX
+                if has_ua {
+                    ctx.result.add_tag("DIRECT_TO_MX");
+                }
+            }
+            1 => {
+                ctx.result.add_tag("RCVD_COUNT_ONE");
+            }
+            2 => {
+                ctx.result.add_tag("RCVD_COUNT_TWO");
+            }
+            3 => {
+                ctx.result.add_tag("RCVD_COUNT_THREE");
+            }
+            4 | 5 => {
+                ctx.result.add_tag("RCVD_COUNT_FIVE");
+            }
+            6 | 7 => {
+                ctx.result.add_tag("RCVD_COUNT_SEVEN");
+            }
+            8..=12 => {
+                ctx.result.add_tag("RCVD_COUNT_TWELVE");
+            }
+            _ => {}
+        }
+    }
+}
--- a/crates/spam-filter/src/analysis/recipient.rs
+++ b/crates/spam-filter/src/analysis/recipient.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::{scripts::functions::text::levenshtein_distance, Core};
+use common::{scripts::functions::text::levenshtein_distance, Server};
 use mail_parser::HeaderName;
 use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
 use store::ahash::HashSet;
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeRecipient: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeRecipient for Core {
+impl SpamFilterAnalyzeRecipient for Server {
    async fn spam_filter_analyze_recipient(&self, ctx: &mut SpamFilterContext<'_>) {
        let mut to_raw = b"".as_slice();
        let mut cc_raw = b"".as_slice();
@ -191,7 +191,7 @@ impl SpamFilterAnalyzeRecipient for Core {

            // Check for freemail or disposable domains
            if let Some(domain) = rcpt.email.domain_part.sld.as_deref() {
-                if self.spam.list_freemail_providers.contains(domain) {
+                if self.core.spam.list_freemail_providers.contains(domain) {
                    if ctx
                        .output
                        .recipients_to
@ -202,7 +202,7 @@ impl SpamFilterAnalyzeRecipient for Core {
                    } else {
                        ctx.result.add_tag("FREEMAIL_CC");
                    }
-                } else if self.spam.list_disposable_providers.contains(domain) {
+                } else if self.core.spam.list_disposable_providers.contains(domain) {
                    if ctx
                        .output
                        .recipients_to
--- a/crates/spam-filter/src/analysis/replyto.rs
+++ b/crates/spam-filter/src/analysis/replyto.rs
@ -1,6 +1,6 @@
 use std::future::Future;

-use common::Core;
+use common::Server;
 use mail_parser::HeaderName;

 use crate::SpamFilterContext;
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeReplyTo: Sync + Send {
    ) -> impl Future<Output = ()> + Send;
 }

-impl SpamFilterAnalyzeReplyTo for Core {
+impl SpamFilterAnalyzeReplyTo for Server {
    async fn spam_filter_analyze_reply_to(&self, ctx: &mut SpamFilterContext<'_>) {
        let mut reply_to_raw = b"".as_slice();
        let mut is_from_list = false;
@ -104,7 +104,12 @@ impl SpamFilterAnalyzeReplyTo for Core {
                .sld
                .as_deref()
                .unwrap_or_default();
-            if self.spam.list_freemail_providers.contains(reply_to_sld) {
+            if self
+                .core
+                .spam
+                .list_freemail_providers
+                .contains(reply_to_sld)
+            {
                ctx.result.add_tag("FREEMAIL_REPLYTO");
                let from_domain_sld = ctx
                    .output
@ -115,11 +120,20 @@ impl SpamFilterAnalyzeReplyTo for Core {
                    .as_deref()
                    .unwrap_or_default();
                if reply_to_sld != from_domain_sld
-                    && self.spam.list_freemail_providers.contains(from_domain_sld)
+                    && self
+                        .core
+                        .spam
+                        .list_freemail_providers
+                        .contains(from_domain_sld)
                {
                    ctx.result.add_tag("FREEMAIL_REPLYTO_NEQ_FROM_DOM");
                }
-            } else if self.spam.list_disposable_providers.contains(reply_to_sld) {
+            } else if self
+                .core
+                .spam
+                .list_disposable_providers
+                .contains(reply_to_sld)
+            {
                ctx.result.add_tag("DISPOSABLE_REPLYTO");
            }

--- a/crates/spam-filter/src/analysis/subject.rs
+++ b/crates/spam-filter/src/analysis/subject.rs
@ -0,0 +1,190 @@
+use std::future::Future;
+
+use common::Server;
+use mail_parser::HeaderName;
+use nlp::tokenizers::types::TokenType;
+use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
+
+use crate::{Email, SpamFilterContext};
+
+pub trait SpamFilterAnalyzeSubject: Sync + Send {
+    fn spam_filter_analyze_subject(
+        &self,
+        ctx: &mut SpamFilterContext<'_>,
+    ) -> impl Future<Output = ()> + Send;
+}
+
+impl SpamFilterAnalyzeSubject for Server {
+    async fn spam_filter_analyze_subject(&self, ctx: &mut SpamFilterContext<'_>) {
+        let mut subject_raw = b"".as_slice();
+        let mut is_reply = false;
+
+        for header in ctx.input.message.headers() {
+            match &header.name {
+                HeaderName::Subject => {
+                    subject_raw = ctx
+                        .input
+                        .message
+                        .raw_message()
+                        .get(header.offset_start..header.offset_end)
+                        .unwrap_or_default();
+                }
+                HeaderName::InReplyTo | HeaderName::References => {
+                    is_reply = true;
+                }
+                _ => {}
+            }
+        }
+
+        if subject_raw.is_empty() {
+            // Missing subject header
+            ctx.result.add_tag("MISSING_SUBJECT");
+            return;
+        }
+
+        let mut word_count = 0;
+        let mut upper_count = 0;
+        let mut lower_count = 0;
+
+        let mut last_ch = ' ';
+        let mut last_ch_trimmed = ' ';
+        let mut is_ascii = true;
+
+        for ch in ctx.output.subject_thread.chars() {
+            if !ch.is_whitespace() {
+                if last_ch.is_whitespace() {
+                    word_count += 1;
+                }
+
+                match ch {
+                    '$' | '€' | '£' | '¥' | '₹' | '₽' | '₿' => {
+                        ctx.result.add_tag("SUBJECT_HAS_CURRENCY");
+                    }
+                    '!' => {
+                        ctx.result.add_tag("SUBJECT_HAS_EXCLAIM");
+                    }
+                    '?' => {
+                        ctx.result.add_tag("SUBJECT_HAS_QUESTION");
+                    }
+                    _ => {
+                        if ch.is_alphabetic() {
+                            if ch.is_uppercase() {
+                                upper_count += 1;
+                            } else {
+                                lower_count += 1;
+                            }
+                        }
+                    }
+                }
+
+                last_ch_trimmed = ch;
+            }
+
+            if !ch.is_ascii() {
+                is_ascii = false;
+            }
+
+            last_ch = ch;
+        }
+
+        match last_ch_trimmed {
+            '?' => {
+                ctx.result.add_tag("SUBJECT_ENDS_QUESTION");
+            }
+            '!' => {
+                ctx.result.add_tag("SUBJECT_ENDS_EXCLAIM");
+            }
+            _ => {}
+        }
+
+        if last_ch.is_whitespace() {
+            if last_ch_trimmed.is_whitespace() {
+                // Subject is empty
+                ctx.result.add_tag("EMPTY_SUBJECT");
+            } else {
+                // Subject ends with whitespace
+                ctx.result.add_tag("SUBJECT_ENDS_SPACES");
+            }
+        }
+
+        if ctx.output.subject_thread.len() >= 10
+            && word_count > 1
+            && upper_count > 2
+            && lower_count == 0
+        {
+            // Subject contains mostly capital letters
+            ctx.result.add_tag("SUBJ_ALL_CAPS");
+        }
+
+        if ctx.output.subject_thread.len() > 200 {
+            // Subject is very long
+            ctx.result.add_tag("LONG_SUBJ");
+        }
+
+        for token in &ctx.output.subject_tokens {
+            match token {
+                TokenType::Url(_) => {
+                    // Subject contains URL
+                    ctx.result.add_tag("URL_IN_SUBJECT");
+                }
+                TokenType::Email(address) => {
+                    // Subject contains recipient
+                    let email = Email::new(address);
+                    if ctx.output.env_to_addr.contains(&email)
+                        || ctx
+                            .output
+                            .all_recipients()
+                            .any(|r| r.email.address == email.address)
+                    {
+                        ctx.result.add_tag("RCPT_IN_SUBJECT");
+                    }
+                    continue;
+                }
+                _ => {}
+            }
+
+            if let Some(hostname) = token.hostname_sld() {
+                let hostname = Some(hostname.to_lowercase());
+                if ctx
+                    .output
+                    .all_recipients()
+                    .any(|r| r.email.domain_part.sld == hostname)
+                {
+                    ctx.result.add_tag("RCPT_DOMAIN_IN_SUBJECT");
+                }
+            }
+        }
+
+        // Validate encoding
+        let subject_raw_utf8 = std::str::from_utf8(subject_raw);
+        if !subject_raw.is_ascii() {
+            if (ctx.input.env_from_flags
+                & (MAIL_SMTPUTF8 | MAIL_BODY_8BITMIME | MAIL_BODY_BINARYMIME))
+                == 0
+            {
+                ctx.result.add_tag("SUBJECT_NEEDS_ENCODING");
+            }
+
+            if subject_raw_utf8.is_err() {
+                ctx.result.add_tag("INVALID_SUBJECT_8BIT");
+            }
+        }
+
+        // Validate unnecessary encoding
+        let subject_raw_utf8 = subject_raw_utf8.unwrap_or_default();
+        if is_ascii && subject_raw_utf8.contains("=?") && subject_raw_utf8.contains("?=") {
+            if subject_raw_utf8.contains("?q?") || subject_raw_utf8.contains("?Q?") {
+                // Subject header is unnecessarily encoded in quoted-printable
+                ctx.result.add_tag("SUBJ_EXCESS_QP");
+            } else if subject_raw_utf8.contains("?b?") || subject_raw_utf8.contains("?B?") {
+                // Subject header is unnecessarily encoded in base64
+                ctx.result.add_tag("SUBJ_EXCESS_BASE64");
+            }
+        }
+
+        if !is_reply && ctx.output.subject.trim().starts_with("re:") {
+            // Subject is not a reply but starts with "re:"
+            ctx.result.add_tag("FAKE_REPLY");
+        }
+    }
+}
--- a/crates/spam-filter/src/analysis/url.rs
+++ b/crates/spam-filter/src/analysis/url.rs
@ -0,0 +1,324 @@
+use std::{borrow::Cow, future::Future, time::Duration};
+
+use common::Server;
+use common::{config::spamfilter::Target, scripts::functions::unicode::CharUtils};
+use hyper::{
+    header::{HeaderName, LOCATION},
+    Uri,
+};
+use nlp::tokenizers::types::TokenType;
+use reqwest::redirect::Policy;
+use unicode_security::MixedScript;
+
+use crate::modules::dnsbl::is_dnsbl;
+use crate::modules::remote_list::is_in_remote_list;
+use crate::{
+    modules::html::{HtmlToken, A, HREF},
+    Hostname, SpamFilterContext, TextPart,
+};
+
+pub trait SpamFilterAnalyzeUrl: Sync + Send {
+    fn spam_filter_analyze_url(
+        &self,
+        ctx: &mut SpamFilterContext<'_>,
+    ) -> impl Future<Output = ()> + Send;
+}
+
+impl SpamFilterAnalyzeUrl for Server {
+    async fn spam_filter_analyze_url(&self, ctx: &mut SpamFilterContext<'_>) {
+        for (part_id, part) in ctx.output.text_parts.iter().enumerate() {
+            if ctx.input.message.text_body.contains(&part_id)
+                || ctx.input.message.html_body.contains(&part_id)
+            {
+                let is_single = match part {
+                    TextPart::Plain { tokens, .. } => is_single_url(tokens),
+                    TextPart::Html {
+                        html_tokens,
+                        tokens,
+                        ..
+                    } => is_single_html_url(html_tokens, tokens),
+                    TextPart::None => false,
+                };
+
+                if is_single {
+                    ctx.result.add_tag("URL_ONLY");
+                    break;
+                }
+            }
+        }
+
+        for url in &ctx.output.urls {
+            for ch in url.chars() {
+                if ch.is_zwsp() {
+                    ctx.result.add_tag("ZERO_WIDTH_SPACE_URL");
+                }
+
+                if ch.is_obscured() {
+                    ctx.result.add_tag("R_SUSPICIOUS_URL");
+                }
+            }
+
+            // Skip non-URLs such as 'data:' and 'mailto:'
+            if !url.contains("://") {
+                continue;
+            }
+
+            // Parse url
+            let url_parsed = match url.parse::<Uri>() {
+                Ok(url) if url.host().is_some() => url,
+                _ => {
+                    // URL could not be parsed
+                    ctx.result.add_tag("R_SUSPICIOUS_URL");
+                    continue;
+                }
+            };
+            let host = Hostname::new(url_parsed.host().unwrap());
+            let host_sld = host.sld_or_default();
+
+            // Skip local and trusted domains
+            if self.core.spam.list_trusted_domains.contains(host_sld)
+                || self
+                    .core
+                    .storage
+                    .directory
+                    .is_local_domain(host_sld)
+                    .await
+                    .unwrap_or_default()
+            {
+                continue;
+            }
+
+            // Check for redirectors
+            let mut redirected_urls = Vec::new();
+            if host.ip.is_none() && self.core.spam.list_url_redirectors.contains(host_sld) {
+                ctx.result.add_tag("REDIRECTOR_URL");
+
+                let mut redirect_count = 0;
+                let mut url_redirect = Cow::Borrowed(url);
+
+                while redirect_count <= 0 {
+                    match http_get_header(url_redirect.as_ref(), LOCATION, Duration::from_secs(5))
+                        .await
+                    {
+                        Ok(Some(location)) => {
+                            if let Ok(location_parsed) = location.parse::<Uri>() {
+                                let host =
+                                    Hostname::new(location_parsed.host().unwrap_or_default());
+                                if self
+                                    .core
+                                    .spam
+                                    .list_url_redirectors
+                                    .contains(host.sld_or_default())
+                                {
+                                    url_redirect = Cow::Owned(location);
+                                    redirect_count += 1;
+                                    continue;
+                                } else {
+                                    let location = location.to_lowercase();
+                                    if !ctx.output.urls.contains(&location) {
+                                        redirected_urls.push((
+                                            Cow::Owned(location),
+                                            location_parsed,
+                                            host,
+                                        ));
+                                    }
+                                }
+                            }
+                        }
+                        Ok(None) => {}
+                        Err(err) => {
+                            trc::error!(err.span_id(ctx.input.span_id));
+                        }
+                    }
+                    break;
+                }
+
+                if redirect_count > 5 {
+                    ctx.result.add_tag("URL_REDIRECTOR_NESTED");
+                }
+            }
+
+            for (url, url_parsed, host) in [(Cow::Borrowed(url), url_parsed, host)]
+                .into_iter()
+                .chain(redirected_urls.into_iter())
+            {
+                let query = url_parsed
+                    .path_and_query()
+                    .map(|pq| pq.as_str())
+                    .unwrap_or_default();
+                if host.ip.is_none() {
+                    if !host.fqdn.is_ascii() {
+                        if let Ok(cured_host) =
+                            decancer::cure(&host.fqdn, decancer::Options::default())
+                        {
+                            let cured_host = cured_host.to_string();
+                            if cured_host != host.fqdn
+                                && matches!(self.core.dns_exists_ip(&cured_host).await, Ok(true))
+                            {
+                                ctx.result.add_tag("HOMOGRAPH_URL");
+                            }
+
+                            if !cured_host.is_single_script() {
+                                ctx.result.add_tag("MIXED_CHARSET_URL");
+                            }
+                        }
+                    } else if matches!(host.sld.as_deref(), Some("googleusercontent.com"))
+                        && query.starts_with("/proxy/")
+                    {
+                        ctx.result.add_tag("HAS_GUC_PROXY_URI");
+                    } else if host.fqdn.ends_with("firebasestorage.googleapis.com") {
+                        ctx.result.add_tag("HAS_GOOGLE_FIREBASE_URL");
+                    } else if host.sld_or_default().starts_with("google.") && query.contains("url?")
+                    {
+                        ctx.result.add_tag("HAS_GOOGLE_REDIR");
+                    }
+
+                    if host.fqdn.contains("ipfs.")
+                        || (query.contains("/ipfs") && query.contains("/qm"))
+                    {
+                        // InterPlanetary File System (IPFS) gateway URL, likely malicious
+                        ctx.result.add_tag("HAS_IPFS_GATEWAY_URL");
+                    } else if host.fqdn.ends_with(".onion") {
+                        // Onion URL
+                        ctx.result.add_tag("HAS_ONION_URI");
+                    }
+                } else {
+                    // URL is an ip address
+                    ctx.result.add_tag("R_SUSPICIOUS_URL");
+                }
+
+                if query.starts_with("/wp-") {
+                    // Contains WordPress URIs
+                    ctx.result.add_tag("HAS_WP_URI");
+
+                    if query.starts_with("/wp-content") || query.starts_with("/wp-includes") {
+                        // URL that is pointing to a compromised WordPress installation
+                        ctx.result.add_tag("WP_COMPROMISED");
+                    }
+                }
+
+                if query.contains("/../")
+                    && !query.contains("/.well-known")
+                    && !query.contains("/.well_known")
+                {
+                    // Message contains URI with a hidden path
+                    ctx.result.add_tag("URI_HIDDEN_PATH");
+                }
+
+                // Check remote lists
+                for remote in &self.core.spam.remote_lists {
+                    if matches!(remote.target, Target::Url)
+                        && is_in_remote_list(self, remote, url.as_ref(), ctx.input.span_id).await
+                    {
+                        ctx.result.add_tag(&remote.tag);
+                    }
+                }
+
+                // Check DNSBL
+                for dnsbl in &self.core.spam.dnsbls {
+                    if matches!(dnsbl.target, Target::Url) {
+                        if let Some(tag) =
+                            is_dnsbl(self, dnsbl, url.as_ref(), ctx.input.span_id).await
+                        {
+                            ctx.result.add_tag(tag);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+async fn http_get_header(
+    url: &str,
+    header: HeaderName,
+    timeout: Duration,
+) -> trc::Result<Option<String>> {
+    reqwest::Client::builder()
+        .user_agent("Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0")
+        .timeout(timeout)
+        .redirect(Policy::none())
+        .danger_accept_invalid_certs(true)
+        .build()
+        .map_err(|err| {
+            trc::SieveEvent::RuntimeError
+                .into_err()
+                .reason(err)
+                .details("Failed to build request")
+        })?
+        .get(url)
+        .send()
+        .await
+        .map_err(|err| {
+            trc::SieveEvent::RuntimeError
+                .into_err()
+                .reason(err)
+                .details("Failed to send request")
+        })
+        .map(|response| {
+            response
+                .headers()
+                .get(header)
+                .and_then(|h| h.to_str().ok())
+                .map(|h| h.to_string())
+        })
+}
+
+fn is_single_url<T: AsRef<str>>(tokens: &[TokenType<T>]) -> bool {
+    let mut url_count = 0;
+    let mut word_count = 0;
+
+    for token in tokens {
+        match token {
+            TokenType::Alphabetic(_)
+            | TokenType::Alphanumeric(_)
+            | TokenType::Integer(_)
+            | TokenType::Email(_)
+            | TokenType::Float(_) => {
+                word_count += 1;
+            }
+            TokenType::Url(_) | TokenType::UrlNoScheme(_) => {
+                url_count += 1;
+            }
+            _ => {}
+        }
+    }
+
+    url_count == 1 && word_count <= 1
+}
+
+fn is_single_html_url<T: AsRef<str>>(html_tokens: &[HtmlToken], tokens: &[TokenType<T>]) -> bool {
+    let mut url_count = 0;
+    let mut word_count = 0;
+
+    for token in tokens {
+        match token {
+            TokenType::Alphabetic(_)
+            | TokenType::Alphanumeric(_)
+            | TokenType::Integer(_)
+            | TokenType::Email(_)
+            | TokenType::Float(_) => {
+                word_count += 1;
+            }
+            TokenType::Url(_) | TokenType::UrlNoScheme(_) => {
+                url_count += 1;
+            }
+            _ => {}
+        }
+    }
+
+    if word_count > 1 || url_count != 1 {
+        return false;
+    }
+
+    url_count = 0;
+
+    for token in html_tokens {
+        if matches!(token, HtmlToken::StartTag { name, attributes } if *name == A && attributes.iter().any(|(k, _)| *k == HREF))
+        {
+            url_count += 1;
+        }
+    }
+
+    url_count == 1
+}
--- a/crates/spam-filter/src/lib.rs
+++ b/crates/spam-filter/src/lib.rs
@ -7,10 +7,13 @@ use std::net::IpAddr;

 use mail_auth::{dmarc::Policy, ArcOutput, DkimOutput, DmarcResult, IprevOutput, SpfOutput};
 use mail_parser::Message;
+use modules::html::HtmlToken;
+use nlp::tokenizers::types::TokenType;
 use store::ahash::AHashSet;

 pub struct SpamFilterInput<'x> {
    pub message: &'x Message<'x>,
+    pub span_id: u64,

    // Sender authentication
    pub arc_result: &'x ArcOutput<'x>,
@ -36,7 +39,7 @@ pub struct SpamFilterInput<'x> {
    pub env_rcpt_to: &'x [&'x str],
 }

-pub struct SpamFilterOutput {
+pub struct SpamFilterOutput<'x> {
    pub ehlo_host: Hostname,
    pub iprev_ptr: Option<String>,

@ -51,6 +54,23 @@ pub struct SpamFilterOutput {

    pub subject: String,
    pub subject_thread: String,
+    pub subject_tokens: Vec<TokenType<&'x str>>,
+
+    pub text_parts: Vec<TextPart<'x>>,
+    pub urls: HashSet<String>,
+}
+
+pub enum TextPart<'x> {
+    Plain {
+        text_body: &'x str,
+        tokens: Vec<TokenType<&'x str>>,
+    },
+    Html {
+        html_tokens: Vec<HtmlToken>,
+        text_body: String,
+        tokens: Vec<TokenType<String>>,
+    },
+    None,
 }

 pub struct SpamFilterResult {
@ -59,7 +79,7 @@ pub struct SpamFilterResult {

 pub struct SpamFilterContext<'x> {
    pub input: SpamFilterInput<'x>,
-    pub output: SpamFilterOutput,
+    pub output: SpamFilterOutput<'x>,
    pub result: SpamFilterResult,
 }

--- a/crates/spam-filter/src/modules/dnsbl.rs
+++ b/crates/spam-filter/src/modules/dnsbl.rs
@ -0,0 +1,53 @@
+use std::time::Instant;
+
+use common::{config::spamfilter::DnsblConfig, Server};
+use mail_auth::Error;
+use trc::SpamEvent;
+
+pub async fn is_dnsbl(
+    server: &Server,
+    config: &DnsblConfig,
+    item: &str,
+    span_id: u64,
+) -> Option<String> {
+    let time = Instant::now();
+    let zone = server
+        .eval_expr::<String, _>(&config.zone, &item, &config.id, span_id)
+        .await?;
+    let todo = "use proper event error";
+
+    match server.core.smtp.resolvers.dns.ipv4_lookup(&zone).await {
+        Ok(result) => {
+            let result = result.iter().map(|ip| ip.to_string()).collect::<Vec<_>>();
+
+            trc::event!(
+                Spam(SpamEvent::Classify),
+                Result = result
+                    .iter()
+                    .map(|ip| trc::Value::from(ip.clone()))
+                    .collect::<Vec<_>>(),
+                Elapsed = time.elapsed()
+            );
+
+            server.eval_if(&config.tags, &result, span_id).await
+        }
+        Err(Error::DnsRecordNotFound(_)) => {
+            trc::event!(
+                Spam(SpamEvent::Classify),
+                Result = trc::Value::None,
+                Elapsed = time.elapsed()
+            );
+
+            None
+        }
+        Err(err) => {
+            trc::event!(
+                Spam(SpamEvent::Classify),
+                Elapsed = time.elapsed(),
+                CausedBy = err.to_string()
+            );
+
+            None
+        }
+    }
+}
--- a/crates/spam-filter/src/modules/html.rs
+++ b/crates/spam-filter/src/modules/html.rs
@ -0,0 +1,382 @@
+use mail_parser::decoders::html::add_html_token;
+
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub enum HtmlToken {
+    StartTag {
+        name: u64,
+        attributes: Vec<(u64, Option<String>)>,
+    },
+    EndTag {
+        name: u64,
+    },
+    Comment {
+        text: String,
+    },
+    Text {
+        text: String,
+    },
+}
+
+pub(crate) const A: u64 = b'a' as u64;
+
+pub(crate) const HREF: u64 =
+    (b'h' as u64) | (b'r' as u64) << 8 | (b'e' as u64) << 16 | (b'f' as u64) << 24;
+pub(crate) const SRC: u64 = (b's' as u64) | (b'r' as u64) << 8 | (b'c' as u64) << 16;
+
+pub fn html_to_tokens(input: &str) -> Vec<HtmlToken> {
+    let input = input.as_bytes();
+    let mut iter = input.iter().enumerate().peekable();
+    let mut tags = vec![];
+
+    let mut is_token_start = true;
+    let mut is_after_space = false;
+    let mut is_new_line = true;
+
+    let mut token_start = 0;
+    let mut token_end = 0;
+
+    let mut text = String::new();
+
+    while let Some((mut pos, &ch)) = iter.next() {
+        match ch {
+            b'<' => {
+                if !is_token_start {
+                    add_html_token(
+                        &mut text,
+                        &input[token_start..token_end + 1],
+                        is_after_space,
+                    );
+                    is_after_space = false;
+                    is_token_start = true;
+                }
+                if !text.is_empty() {
+                    tags.push(HtmlToken::Text {
+                        text: std::mem::take(&mut text),
+                    });
+                }
+
+                while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace()) {
+                    pos += 1;
+                    iter.next();
+                }
+
+                if matches!(input.get(pos + 1..pos + 4), Some(b"!--")) {
+                    let mut comment = Vec::new();
+                    let mut last_ch: u8 = 0;
+                    for (_, &ch) in iter.by_ref() {
+                        match ch {
+                            b'>' if comment.len() > 2
+                                && matches!(comment.last(), Some(b'-'))
+                                && matches!(comment.get(comment.len() - 2), Some(b'-')) =>
+                            {
+                                break;
+                            }
+                            b' ' | b'\t' | b'\r' | b'\n' => {
+                                if last_ch != b' ' {
+                                    comment.push(b' ');
+                                } else {
+                                    last_ch = b' ';
+                                }
+                                continue;
+                            }
+                            _ => {
+                                comment.push(ch);
+                            }
+                        }
+                        last_ch = ch;
+                    }
+                    tags.push(HtmlToken::Comment {
+                        text: String::from_utf8(comment).unwrap_or_default(),
+                    });
+                } else {
+                    let mut is_end_tag = false;
+                    loop {
+                        match iter.peek() {
+                            Some((_, &b'/')) => {
+                                is_end_tag = true;
+                                pos += 1;
+                                iter.next();
+                            }
+                            Some((_, ch)) if ch.is_ascii_whitespace() => {
+                                pos += 1;
+                                iter.next();
+                            }
+                            _ => break,
+                        }
+                    }
+
+                    let mut in_quote = false;
+
+                    let mut key: u64 = 0;
+                    let mut shift = 0;
+
+                    let mut tag = 0;
+                    let mut attributes = vec![];
+
+                    'outer: while let Some((_, &ch)) = iter.next() {
+                        match ch {
+                            b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' if shift < 64 => {
+                                key |= (ch as u64) << shift;
+                                shift += 8;
+                            }
+                            b'A'..=b'Z' if shift < 64 => {
+                                key |= ((ch - b'A' + b'a') as u64) << shift;
+                                shift += 8;
+                            }
+                            b'>' if !in_quote => {
+                                if shift != 0 {
+                                    if tag == 0 {
+                                        tag = key;
+                                    } else {
+                                        attributes.push((key, None));
+                                    }
+                                }
+                                break;
+                            }
+                            b'"' => {
+                                in_quote = !in_quote;
+                            }
+                            b'=' if !in_quote => {
+                                while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace())
+                                {
+                                    iter.next();
+                                }
+
+                                if shift != 0 {
+                                    attributes.push((key, None));
+                                    key = 0;
+                                    shift = 0;
+                                }
+
+                                let mut value = vec![];
+
+                                for (_, &ch) in iter.by_ref() {
+                                    match ch {
+                                        b'>' if !in_quote => {
+                                            if !value.is_empty() {
+                                                attributes.last_mut().unwrap().1 =
+                                                    String::from_utf8(value)
+                                                        .unwrap_or_default()
+                                                        .into();
+                                            }
+                                            break 'outer;
+                                        }
+                                        b'"' => {
+                                            if in_quote {
+                                                in_quote = false;
+                                                break;
+                                            } else {
+                                                in_quote = true;
+                                            }
+                                        }
+                                        b' ' | b'\t' | b'\r' | b'\n' if !in_quote => {
+                                            break;
+                                        }
+                                        _ => {
+                                            value.push(ch);
+                                        }
+                                    }
+                                }
+
+                                if !value.is_empty() {
+                                    attributes.last_mut().unwrap().1 =
+                                        String::from_utf8(value).unwrap_or_default().into();
+                                }
+                            }
+                            b' ' | b'\t' | b'\r' | b'\n' => {
+                                if shift != 0 {
+                                    if tag == 0 {
+                                        tag = key;
+                                    } else {
+                                        attributes.push((key, None));
+                                    }
+                                    key = 0;
+                                    shift = 0;
+                                }
+                            }
+                            _ => {}
+                        }
+                    }
+
+                    if tag != 0 {
+                        if is_end_tag {
+                            tags.push(HtmlToken::EndTag { name: tag });
+                        } else {
+                            tags.push(HtmlToken::StartTag {
+                                name: tag,
+                                attributes,
+                            });
+                        }
+                    }
+                }
+                continue;
+            }
+            b' ' | b'\t' | b'\r' | b'\n' => {
+                if !is_token_start {
+                    add_html_token(
+                        &mut text,
+                        &input[token_start..token_end + 1],
+                        is_after_space && !is_new_line,
+                    );
+                    is_new_line = false;
+                }
+                is_after_space = true;
+                is_token_start = true;
+                continue;
+            }
+            b'&' if !is_token_start => {
+                add_html_token(
+                    &mut text,
+                    &input[token_start..token_end + 1],
+                    is_after_space && !is_new_line,
+                );
+                is_new_line = false;
+                is_token_start = true;
+                is_after_space = false;
+            }
+            b';' if !is_token_start => {
+                add_html_token(
+                    &mut text,
+                    &input[token_start..pos + 1],
+                    is_after_space && !is_new_line,
+                );
+                is_token_start = true;
+                is_after_space = false;
+                is_new_line = false;
+                continue;
+            }
+            _ => (),
+        }
+
+        if is_token_start {
+            token_start = pos;
+            is_token_start = false;
+        }
+        token_end = pos;
+    }
+
+    if !is_token_start {
+        add_html_token(
+            &mut text,
+            &input[token_start..token_end + 1],
+            is_after_space && !is_new_line,
+        );
+    }
+    if !text.is_empty() {
+        tags.push(HtmlToken::Text { text });
+    }
+
+    tags
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_html_to_tokens_text() {
+        let input = "Hello, world!";
+        let tokens = html_to_tokens(input);
+        assert_eq!(
+            tokens,
+            vec![HtmlToken::Text {
+                text: "Hello, world!".to_string()
+            }]
+        );
+    }
+
+    #[test]
+    fn test_html_to_tokens_start_tag() {
+        let input = "<div>";
+        let tokens = html_to_tokens(input);
+        assert_eq!(
+            tokens,
+            vec![HtmlToken::StartTag {
+                name: 7760228,
+                attributes: vec![]
+            }]
+        );
+    }
+
+    #[test]
+    fn test_html_to_tokens_end_tag() {
+        let input = "</div>";
+        let tokens = html_to_tokens(input);
+        assert_eq!(tokens, vec![HtmlToken::EndTag { name: 7760228 }]);
+    }
+
+    #[test]
+    fn test_html_to_tokens_comment() {
+        let input = "<!-- This is a comment -->";
+        let tokens = html_to_tokens(input);
+        assert_eq!(
+            tokens,
+            vec![HtmlToken::Comment {
+                text: "!-- This is a comment --".to_string()
+            }]
+        );
+    }
+
+    #[test]
+    fn test_html_to_tokens_mixed() {
+        let input = "<div>Hello, <span>&quot; world &quot; </span>!</div>";
+        let tokens = html_to_tokens(input);
+        assert_eq!(
+            tokens,
+            vec![
+                HtmlToken::StartTag {
+                    name: 7760228,
+                    attributes: vec![]
+                },
+                HtmlToken::Text {
+                    text: "Hello,".to_string()
+                },
+                HtmlToken::StartTag {
+                    name: 1851879539,
+                    attributes: vec![]
+                },
+                HtmlToken::Text {
+                    text: " \" world \"".to_string()
+                },
+                HtmlToken::EndTag { name: 1851879539 },
+                HtmlToken::Text {
+                    text: " !".to_string()
+                },
+                HtmlToken::EndTag { name: 7760228 }
+            ]
+        );
+    }
+
+    #[test]
+    fn test_html_to_tokens_with_attributes() {
+        let input = r#"<input type="text" value="test"><single/><one attr/><a b=1 b c="123">"#;
+        let tokens = html_to_tokens(input);
+        assert_eq!(
+            tokens,
+            vec![
+                HtmlToken::StartTag {
+                    name: 500186508905,
+                    attributes: vec![
+                        (1701869940, Some("text".to_string())),
+                        (435761734006, Some("test".to_string()))
+                    ]
+                },
+                HtmlToken::StartTag {
+                    name: 111516266162547,
+                    attributes: vec![]
+                },
+                HtmlToken::StartTag {
+                    name: 6647407,
+                    attributes: vec![(1920234593, None)]
+                },
+                HtmlToken::StartTag {
+                    name: 97,
+                    attributes: vec![
+                        (98, Some("1".to_string())),
+                        (98, None),
+                        (99, Some("123".to_string()))
+                    ]
+                }
+            ]
+        );
+    }
+}
--- a/crates/spam-filter/src/modules/mod.rs
+++ b/crates/spam-filter/src/modules/mod.rs
@ -1 +1,4 @@
+pub mod dnsbl;
+pub mod html;
+pub mod remote_list;
 pub mod sanitize;
--- a/crates/spam-filter/src/modules/remote_list.rs
+++ b/crates/spam-filter/src/modules/remote_list.rs
@ -0,0 +1,199 @@
+use std::{
+    collections::HashSet,
+    io::{BufRead, BufReader},
+    time::Instant,
+};
+
+use common::{
+    config::{
+        scripts::RemoteList,
+        spamfilter::{RemoteListConfig, RemoteListFormat},
+    },
+    HttpLimitResponse, Server, USER_AGENT,
+};
+use mail_auth::flate2;
+
+pub async fn is_in_remote_list(
+    server: &Server,
+    config: &RemoteListConfig,
+    item: &str,
+    span_id: u64,
+) -> bool {
+    match is_in_remote_list_(server, config, item, span_id).await {
+        Ok(result) => result,
+        Err(err) => {
+            let mut _lock = server.inner.data.remote_lists.write();
+            let list = _lock
+                .entry(config.id.clone())
+                .or_insert_with(|| RemoteList {
+                    entries: HashSet::new(),
+                    expires: Instant::now(),
+                });
+
+            if list.expires > Instant::now() {
+                list.entries.contains(item)
+            } else {
+                list.expires = Instant::now() + config.retry;
+                trc::error!(err.span_id(span_id));
+                false
+            }
+        }
+    }
+}
+
+async fn is_in_remote_list_(
+    server: &Server,
+    config: &RemoteListConfig,
+    item: &str,
+    span_id: u64,
+) -> trc::Result<bool> {
+    #[cfg(feature = "test_mode")]
+    {
+        if (config.url.contains("open") && item.contains("open"))
+            || (config.url.contains("tank") && item.contains("tank"))
+        {
+            return Ok(true);
+        }
+    }
+
+    let todo = "update RuntimeError with SpamEvent error";
+
+    match server.inner.data.remote_lists.read().get(&config.id) {
+        Some(remote_list) if remote_list.expires < Instant::now() => {
+            return Ok(remote_list.entries.contains(item))
+        }
+        _ => {}
+    }
+
+    let response = reqwest::Client::builder()
+        .timeout(config.timeout)
+        .user_agent(USER_AGENT)
+        .build()
+        .unwrap_or_default()
+        .get(&config.url)
+        .send()
+        .await
+        .map_err(|err| {
+            trc::SieveEvent::RuntimeError
+                .into_err()
+                .reason(err)
+                .ctx(trc::Key::Url, config.url.to_string())
+                .details("Failed to build request")
+        })?;
+
+    if response.status().is_success() {
+        let bytes = response
+            .bytes_with_limit(config.max_size)
+            .await
+            .map_err(|err| {
+                trc::SieveEvent::RuntimeError
+                    .into_err()
+                    .reason(err)
+                    .ctx(trc::Key::Url, config.url.to_string())
+                    .details("Failed to fetch resource")
+            })?
+            .ok_or_else(|| {
+                trc::SieveEvent::RuntimeError
+                    .into_err()
+                    .ctx(trc::Key::Url, config.url.to_string())
+                    .details("Resource is too large")
+            })?;
+
+        let reader: Box<dyn std::io::Read> = if config.url.ends_with(".gz") {
+            Box::new(flate2::read::GzDecoder::new(&bytes[..]))
+        } else {
+            Box::new(&bytes[..])
+        };
+
+        // Lock remote list for writing
+        let mut _lock = server.inner.data.remote_lists.write();
+        let list = _lock
+            .entry(config.id.to_string())
+            .or_insert_with(|| RemoteList {
+                entries: HashSet::new(),
+                expires: Instant::now(),
+            });
+
+        // Make sure that the list is still expired
+        if list.expires > Instant::now() {
+            return Ok(list.entries.contains(item));
+        }
+
+        for (pos, line) in BufReader::new(reader).lines().enumerate() {
+            let line_ = line.map_err(|err| {
+                trc::SieveEvent::RuntimeError
+                    .into_err()
+                    .reason(err)
+                    .ctx(trc::Key::Url, config.url.to_string())
+                    .details("Failed to read line")
+            })?;
+            // Clear list once the first entry has been successfully fetched, decompressed and UTF8-decoded
+            if pos == 0 {
+                list.entries.clear();
+            }
+
+            match &config.format {
+                RemoteListFormat::List => {
+                    let line = line_.trim();
+                    if !line.is_empty() {
+                        list.entries.insert(line.to_string());
+                    }
+                }
+                RemoteListFormat::Csv {
+                    column,
+                    separator,
+                    skip_first,
+                } if pos > 0 || !*skip_first => {
+                    let mut in_quote = false;
+                    let mut col_num = 0;
+                    let mut entry = String::new();
+
+                    for ch in line_.chars() {
+                        if ch != '"' {
+                            if ch == *separator && !in_quote {
+                                if col_num == *column {
+                                    break;
+                                } else {
+                                    col_num += 1;
+                                }
+                            } else if col_num == *column {
+                                entry.push(ch);
+                                if entry.len() > config.max_entry_size {
+                                    break;
+                                }
+                            }
+                        } else {
+                            in_quote = !in_quote;
+                        }
+                    }
+
+                    if !entry.is_empty() {
+                        list.entries.insert(entry);
+                    }
+                }
+                _ => (),
+            }
+
+            if list.entries.len() == config.max_entries {
+                break;
+            }
+        }
+
+        trc::event!(
+            Spam(trc::SpamEvent::ListUpdated),
+            Url = config.url.to_string(),
+            Total = list.entries.len(),
+            SpanId = span_id
+        );
+
+        // Update expiration
+        list.expires = Instant::now() + config.refresh;
+        Ok(list.entries.contains(item))
+    } else {
+        trc::bail!(trc::SieveEvent::RuntimeError
+            .into_err()
+            .ctx(trc::Key::Code, response.status().as_u16())
+            .ctx(trc::Key::Url, config.url.to_string())
+            .details("Failed to fetch remote list"));
+    }
+}
--- a/crates/spam-filter/src/modules/sanitize.rs
+++ b/crates/spam-filter/src/modules/sanitize.rs
@ -4,7 +4,30 @@ use crate::{Email, Hostname};

 impl Hostname {
    pub fn new(host: &str) -> Self {
-        let fqdn = host.to_lowercase();
+        let mut fqdn = host.to_lowercase();
+
+        // Decode punycode
+        if fqdn.contains("xn--") {
+            let mut decoded = String::with_capacity(fqdn.len());
+
+            for part in fqdn.split('.') {
+                if !decoded.is_empty() {
+                    decoded.push('.');
+                }
+
+                if let Some(puny) = part
+                    .strip_prefix("xn--")
+                    .and_then(idna::punycode::decode_to_string)
+                {
+                    decoded.push_str(&puny);
+                } else {
+                    decoded.push_str(part);
+                }
+            }
+
+            fqdn = decoded;
+        }
+
        let ip = fqdn
            .strip_prefix('[')
            .and_then(|ip| ip.strip_suffix(']'))
@ -36,3 +59,9 @@ impl Email {
        }
    }
 }
+
+impl Hostname {
+    pub fn sld_or_default(&self) -> &str {
+        self.sld.as_deref().unwrap_or(self.fqdn.as_str())
+    }
+}
--- a/resources/config/spamfilter/scripts/bounce.sieve
+++ b/resources/config/spamfilter/scripts/bounce.sieve
@ -1,49 +0,0 @@
-
-if eval "(contains(subject_lc, 'delivery') && 
-            (contains(subject_lc, 'failed') || 
-             contains(subject_lc, 'report') || 
-             contains(subject_lc, 'status') || 
-             contains(subject_lc, 'warning'))) ||
-         (contains(subject_lc, 'failure') && 
-            (contains(subject_lc, 'delivery') || 
-             contains(subject_lc, 'notice') || 
-             contains(subject_lc, 'mail') )) ||
-         (contains(subject_lc, 'delivered') &&
-            (contains(subject_lc, 'couldn\\'t be') || 
-             contains(subject_lc, 'could not be') || 
-             contains(subject_lc, 'hasn\\'t been') || 
-             contains(subject_lc, 'has not been'))) ||
-         contains(subject_lc, 'returned mail') ||
-         contains(subject_lc, 'undeliverable') || 
-         contains(subject_lc, 'undelivered')" {
-    # Subject contains words or phrases typical for DSN
-    let "t.SUBJ_BOUNCE_WORDS" "1";
-}
-
-if eval "is_empty(envelope.from)" {
-    if eval "eq_ignore_case(header.content-type, 'multipart/report') && 
-             ( eq_ignore_case(header.content-type.attr.report-type, 'delivery-status') ||
-               eq_ignore_case(header.content-type.attr.report-type, 'disposition-notification'))" {
-        let "t.BOUNCE" "1";
-    } else {
-        let "from" "to_lowercase(header.from)";
-
-        if eval "contains(from, 'mdaemon') && !is_empty(header.X-MDDSN-Message)" {
-            let "t.BOUNCE" "1";
-        } elsif eval "contains(from, 'postmaster') || contains(from, 'mailer-daemon')" {
-            if eval "t.SUBJ_BOUNCE_WORDS" {
-                let "t.BOUNCE" "1";
-            } else {
-                foreverypart {
-                    if eval "(eq_ignore_case(header.content-type.type, 'message') ||
-                              eq_ignore_case(header.content-type.type, 'text')) &&
-                             (eq_ignore_case(header.content-type.subtype, 'rfc822-headers') ||
-                              eq_ignore_case(header.content-type.subtype, 'rfc822'))" {
-                        let "t.BOUNCE" "1";
-                        break;
-                    }
-                }
-            }
-        }
-    }
-}
--- a/resources/config/spamfilter/scripts/received.sieve
+++ b/resources/config/spamfilter/scripts/received.sieve
@ -1,93 +0,0 @@
-let "rcvd_raw" "header.received[*].raw";
-let "rcvd_count" "count(rcvd_raw)";
-
-# Count received headers
-if eval "rcvd_count == 0" {
-    let "t.RCVD_COUNT_ZERO" "1";
-} elsif eval "rcvd_count == 1" {
-    let "t.RCVD_COUNT_ONE" "1";
-} elsif eval "rcvd_count == 2" {
-    let "t.RCVD_COUNT_TWO" "1";
-} elsif eval "rcvd_count == 3" {
-    let "t.RCVD_COUNT_THREE" "1";
-} elsif eval "rcvd_count <= 5" {
-    let "t.RCVD_COUNT_FIVE" "1";
-} elsif eval "rcvd_count <= 7" {
-    let "t.RCVD_COUNT_SEVEN" "1";
-} elsif eval "rcvd_count <= 12" {
-    let "t.RCVD_COUNT_TWELVE" "1";
-}
-
-# Received from an authenticated user
-if eval "!is_empty(env.authenticated_as)" {
-    let "t.RCVD_VIA_SMTP_AUTH" "1";
-}
-
-# Received headers have non-ASCII characters
-if eval "!is_ascii(rcvd_raw)" {
-    let "t.RCVD_ILLEGAL_CHARS" "1";
-}
-
-let "i" "0";
-let "tls_count" "0";
-let "rcvd_from_ip" "0";
-while "i < rcvd_count" {
-    let "i" "i + 1";
-    let "helo_domain" "received_part(i, 'from')";
-
-    # Check for a forged received trail
-    if eval "!t.FORGED_RCVD_TRAIL" {
-        let "iprev" "received_part(i, 'iprev')";
-
-        if eval "!is_empty(iprev) && !is_empty(helo_domain) && !eq_ignore_case(helo_domain, iprev)" {
-            let "t.FORGED_RCVD_TRAIL" "1";
-        }
-    }
-
-    if eval "!t.PREVIOUSLY_DELIVERED" {
-        let "for" "received_part(i, 'for')";
-        # Recipient appears on Received trail
-        if eval "!is_empty(for) && contains_ignore_case(recipients, for)" {
-            let "t.PREVIOUSLY_DELIVERED" "1";
-        }
-    }
-
-    if eval "!t.RCVD_HELO_USER && eq_ignore_case(helo_domain, 'user')" {
-        # Received: HELO contains 'user'
-        let "t.RCVD_HELO_USER" "1";
-    }
-
-    if eval "!is_empty(received_part(i, 'from.ip'))" {
-        # Received from an IP address rather than a FQDN
-        let "rcvd_from_ip" "rcvd_from_ip + 1";
-    }
-
-    if eval "!is_empty(received_part(i, 'tls'))" {
-        # Received with TLS
-        let "tls_count" "tls_count + 1";
-    }
-}
-
-if eval "rcvd_from_ip >= 2 || (rcvd_from_ip == 1 && is_ip_addr(env.helo_domain))" {
-    # Has two or more Received headers containing bare IP addresses
-    let "t.RCVD_DOUBLE_IP_SPAM" "1";
-}
-
-if eval "rcvd_count == 0" {
-    # One received header in a message (currently zero but one header will be added later by the MTA)
-    let "t.ONCE_RECEIVED" "1";
-    
-    # Message has been directly delivered from MUA to local MX
-    if eval "header.User-Agent.exists || header.X-Mailer.exists" {
-        let "t.DIRECT_TO_MX" "1";
-    }
-}
-
-# Received with TLS checks
-if eval "rcvd_count > 0 && tls_count == rcvd_count && !is_empty(env.tls.version)" {
-    let "t.RCVD_TLS_ALL" "1";
-} elsif eval "!is_empty(env.tls.version)" {
-    let "t.RCVD_TLS_LAST" "1";
-} else {
-    let "t.RCVD_NO_TLS_LAST" "1";
-}
--- a/resources/config/spamfilter/scripts/subject.sieve
+++ b/resources/config/spamfilter/scripts/subject.sieve
@ -1,78 +0,0 @@
-
-let "raw_subject_lc" "to_lowercase(header.subject.raw)";
-let "is_ascii_subject" "is_ascii(subject_lc)";
-
-if eval "len(subject_clean) >= 10 && count(tokenize(subject_clean, 'words')) > 1 && is_uppercase(subject_clean)" {
-    # Subject contains mostly capital letters
-	let "t.SUBJ_ALL_CAPS" "1";
-}
-
-if eval "count_chars(subject_clean) > 200" {
-    # Subject is very long
-    let "t.LONG_SUBJ" "1";
-}
-
-if eval "!is_empty(tokenize(subject_lc, 'uri_strict'))" {
-    # Subject contains a URL
-    let "t.URL_IN_SUBJECT" "1";
-}
-
-if eval "!is_ascii(raw_subject_lc) && !env.param.smtputf8 && env.param.body != '8bitmime' && env.param.body != 'binarymime'" {
-    # Subject needs encoding
-    let "t.SUBJECT_NEEDS_ENCODING" "1";
-}
-
-if eval "!header.Subject.exists" {
-    # Missing subject header
-    let "t.MISSING_SUBJECT" "1";
-} elsif eval "is_empty(trim(subject_lc))" {
-    # Subject is empty
-    let "t.EMPTY_SUBJECT" "1";
-}
-
-if eval "is_ascii(subject_lc) && contains(raw_subject_lc, '=?') && contains(raw_subject_lc, '?=')" {
-    if eval "contains(raw_subject_lc, '?q?')" {
-        # Subject header is unnecessarily encoded in quoted-printable
-        let "t.SUBJ_EXCESS_QP" "1";
-    } elsif eval "contains(raw_subject_lc, '?b?')" {
-        # Subject header is unnecessarily encoded in base64
-        let "t.SUBJ_EXCESS_BASE64" "1";
-    }
-}
-
-if eval "starts_with(subject_lc, 're:') && is_empty(header.in-reply-to) && is_empty(header.references)" {
-    # Fake reply
-    let "t.FAKE_REPLY" "1";
-}
-
-let "subject_lc_trim" "trim_end(subject_lc)";
-if eval "subject_lc != subject_lc_trim" {
-    # Subject ends with space characters
-    let "t.SUBJECT_ENDS_SPACES" "1";
-}
-
-if eval "contains(subject_lc, '$') || 
-         contains(subject_lc, '€') || 
-         contains(subject_lc, '£') || 
-         contains(subject_lc, '¥')" {
-    # Subject contains currency symbols
-    let "t.SUBJECT_HAS_CURRENCY" "1";
-}
-
-if eval "ends_with(subject_lc_trim, '!')" {
-    # Subject ends with an exclamation mark
-    let "t.SUBJECT_ENDS_EXCLAIM" "1";
-} elsif eval "ends_with(subject_lc_trim, '?')" {
-    # Subject ends with a question mark
-    let "t.SUBJECT_ENDS_QUESTION" "1";
-}
-
-if eval "contains(subject_lc_trim, '!')" {
-    # Subject contains an exclamation mark
-    let "t.SUBJECT_HAS_EXCLAIM" "1";
-}
-
-if eval "contains(subject_lc_trim, '?')" {
-    # Subject contains a question mark
-    let "t.SUBJECT_HAS_QUESTION" "1";
-}
--- a/resources/config/spamfilter/scripts/url.sieve
+++ b/resources/config/spamfilter/scripts/url.sieve
@ -1,125 +0,0 @@
-if eval "(count(body_urls) == 1 || count(html_body_urls) == 1) && count(tokenize(text_body, 'words')) == 0" {
-    let "t.URL_ONLY" "1";
-}
-
-if eval "has_zwsp(urls)" {
-    let "t.ZERO_WIDTH_SPACE_URL" "1";
-} elsif eval "has_obscured(urls)" {
-    let "t.R_SUSPICIOUS_URL" "1";
-}
-
-let "i" "count(urls)";
-while "i > 0" {
-    let "i" "i - 1";
-    let "url" "urls[i]";
-
-    # Skip non-URLs such as 'data:' and 'mailto:'
-    if eval "!contains(url, '://')" {
-        continue;
-    }
-
-    let "host" "uri_part(url, 'host')";
-
-    if eval "!is_empty(host)" {
-        let "is_ip" "is_ip_addr(host)";
-        let "host" "puny_decode(host)";
-        let "host_lc" "to_lowercase(host)";
-        let "host_sld" "domain_part(host_lc, 'sld')";
-
-        # Skip local and trusted domains
-        if eval "is_local_domain(DOMAIN_DIRECTORY, host_sld) || key_exists('spam-allow', host_sld)" {
-            continue;
-        }
-
-        if eval "!is_ip && 
-                 (!t.REDIRECTOR_URL || !t.URL_REDIRECTOR_NESTED) && 
-                 key_exists('spam-redirect', host_sld)" {
-            let "t.REDIRECTOR_URL" "1";
-            let "redir_count" "1";
-
-            while "redir_count <= 5" {
-                # Use a custom user-agent and a 3 second timeout
-                let "url_redirect" "http_header(url, 'Location', 'Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0', 3000)";
-                if eval "!is_empty(url_redirect)" {
-                    let "url" "url_redirect";
-                    let "host" "uri_part(url, 'host')";
-                    let "is_ip" "is_ip_addr(host)";
-                    let "host" "puny_decode(host)";
-                    let "host_lc" "to_lowercase(host)";
-                    let "host_sld" "domain_part(host_lc, 'sld')";
-
-                    if eval "!is_ip && key_exists('spam-redirect', host_sld)" {
-                        let "redir_count" "redir_count + 1";
-                    } else {
-                        break;
-                    }
-                } else {
-                    break;
-                }
-            }
-
-            if eval "redir_count > 5" {
-                let "t.URL_REDIRECTOR_NESTED" "1";
-            }
-        }
-
-        let "url_lc" "to_lowercase(url)";
-        let "query" "uri_part(url_lc, 'path_query')";
-        if eval "!is_ip" {
-            if eval "!is_ascii(host)" {
-                let "host_cured" "cure_text(host)";
-                if eval "host_lc != host_cured && dns_exists(host_cured, 'ip')" {
-                    let "t.HOMOGRAPH_URL" "1";
-                }
-
-                if eval "!is_single_script(host)" {
-                    let "t.MIXED_CHARSET_URL" "1";
-                }
-            } else {
-                if eval "ends_with(host, 'googleusercontent.com') && starts_with(query, '/proxy/')" {
-                    let "t.HAS_GUC_PROXY_URI" "1";
-                } elsif eval "ends_with(host, 'firebasestorage.googleapis.com')" {
-                    let "t.HAS_GOOGLE_FIREBASE_URL" "1";
-                } elsif eval "starts_with(domain_part(host, 'sld'), 'google.') && contains(query, 'url?') " {
-                    let "t.HAS_GOOGLE_REDIR" "1";
-                }
-            }
-
-            if eval "(contains(host_lc, 'ipfs.') || contains(query, '/ipfs')) && contains(query, '/qm')" {
-                # InterPlanetary File System (IPFS) gateway URL, likely malicious
-                let "t.HAS_IPFS_GATEWAY_URL" "1";
-            } elsif eval "ends_with(host_lc, '.onion')" {
-                let "t.HAS_ONION_URI" "1";
-            }
-        } else {
-            # URL is an ip address
-            let "t.R_SUSPICIOUS_URL" "1";
-        }
-
-        if eval "starts_with(query, '/wp-')" {
-            # Contains WordPress URIs
-            let "t.HAS_WP_URI" "1";
-            if eval "starts_with(query, '/wp-content') | starts_with(query, '/wp-includes')" {
-                # URL that is pointing to a compromised WordPress installation
-                let "t.WP_COMPROMISED" "1";
-            }
-        }
-        if eval "contains(query, '/../') && !contains(query, '/well-known') && !contains(query, '/well_known')" {
-            # Message contains URI with a hidden path
-            let "t.URI_HIDDEN_PATH" "1";
-        }
-
-        # Phishing checks (refresh OpenPhish every 12 hours, PhishTank every 6 hours)
-        if eval "key_exists_http('https://openphish.com/feed.txt', url, [43200, 'list'])" {
-            let "t.PHISHED_OPENPHISH" "1";
-        }
-        if eval "key_exists_http('http://data.phishtank.com/data/online-valid.csv', url, [21600, 'csv', 1, ',', true])" {
-            let "t.PHISHED_PHISHTANK" "1";
-        }
-
-    } else {
-        # URL could not be parsed
-        let "t.R_SUSPICIOUS_URL" "1";
-    }
-}
-