Bayes classifier

2025-12-09 12:55:57 +08:00 · 2023-10-11 19:21:11 +02:00 · 2023-10-11 19:21:11 +02:00 · ace58f74eb
commit ace58f74eb
parent 3d9efd363a
41 changed files with 6737 additions and 934 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -93,9 +93,9 @@ dependencies = [

 [[package]]
 name = "aho-corasick"
-version = "1.1.1"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
+checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
 dependencies = [
 "memchr",
 ]
@ -2676,15 +2676,6 @@ version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"

-[[package]]
-name = "linkify"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1dfa36d52c581e9ec783a7ce2a5e0143da6237be5811a0b3153fedfdbe9f780"
-dependencies = [
- "memchr",
-]
-
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.10"
@ -2994,11 +2985,16 @@ dependencies = [
 "farmhash",
 "jieba-rs",
 "lazy_static",
+ "lru-cache",
 "nohash",
+ "parking_lot",
+ "phf",
 "rust-stemmers",
 "serde",
 "siphasher 1.0.0",
 "tinysegmenter",
+ "tokio",
+ "utils",
 "whatlang",
 "xxhash-rust",
 ]
@ -3294,9 +3290,9 @@ dependencies = [

 [[package]]
 name = "ordered-float"
-version = "3.9.1"
+version = "3.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a54938017eacd63036332b4ae5c8a49fc8c0c1d6d629893057e4f13609edd06"
+checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
 dependencies = [
 "num-traits",
 ]
@ -3630,9 +3626,9 @@ dependencies = [

 [[package]]
 name = "proc-macro2"
-version = "1.0.68"
+version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c"
+checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da"
 dependencies = [
 "unicode-ident",
 ]
@ -3799,9 +3795,9 @@ dependencies = [

 [[package]]
 name = "rasn"
-version = "0.10.1"
+version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4addd1a49756bcb131c2f686c6c833d2b63e4da7a0df07efd8c3de04b7efbdb2"
+checksum = "c22b7f7ff0508dae62e1be69fe02f32eb88523090b50ac850637947853cf5b6d"
 dependencies = [
 "arrayvec",
 "bitvec",
@ -3821,9 +3817,9 @@ dependencies = [

 [[package]]
 name = "rasn-cms"
-version = "0.10.1"
+version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e269b4df6eea0f54abd46afacd759b1c13a27e98da98a47ef3c405ef3568b0f5"
+checksum = "6ecf9f1bb38cbb2a032014f0329d7fd9c2b08f26c4fc882ad642bb95dfefd74f"
 dependencies = [
 "rasn",
 "rasn-pkix",
@ -3831,9 +3827,9 @@ dependencies = [

 [[package]]
 name = "rasn-derive"
-version = "0.10.1"
+version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba8242a16e3461b81333516ad8457906f52fdf21d087417fb59262c9ab406618"
+checksum = "a1e6ddbc9ada563036d59c322cb0886a9b08b346904eebbcd20af2e01caecee7"
 dependencies = [
 "either",
 "itertools 0.10.5",
@ -3846,9 +3842,9 @@ dependencies = [

 [[package]]
 name = "rasn-pkix"
-version = "0.10.1"
+version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06179c947a63fe9f9f5d73a539dcb13d90c6bdaeb03bd28b90ad796aff9fe6a8"
+checksum = "b894c903130c4915d79d8d9ce155429b3896b25efa5f81de4d9ab7b1b0f0b7cf"
 dependencies = [
 "rasn",
 ]
@ -3904,14 +3900,14 @@ dependencies = [

 [[package]]
 name = "regex"
-version = "1.9.6"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff"
+checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87"
 dependencies = [
 "aho-corasick",
 "memchr",
- "regex-automata 0.3.9",
- "regex-syntax 0.7.5",
+ "regex-automata 0.4.1",
+ "regex-syntax 0.8.0",
 ]

 [[package]]
@ -3925,13 +3921,13 @@ dependencies = [

 [[package]]
 name = "regex-automata"
-version = "0.3.9"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
+checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b"
 dependencies = [
 "aho-corasick",
 "memchr",
- "regex-syntax 0.7.5",
+ "regex-syntax 0.8.0",
 ]

 [[package]]
@ -3942,9 +3938,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"

 [[package]]
 name = "regex-syntax"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
+checksum = "c3cbb081b9784b07cceb8824c8583f86db4814d172ab043f3c23f7dc600bf83d"

 [[package]]
 name = "reqwest"
@ -4610,7 +4606,7 @@ checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
 [[package]]
 name = "sieve-rs"
 version = "0.3.1"
-source = "git+https://github.com/stalwartlabs/sieve#c9288b62815610872e9f278b904e34d46124acb5"
+source = "git+https://github.com/stalwartlabs/sieve#bbb265765ebe92394e429001e90ba2e9b4201f9a"
 dependencies = [
 "ahash 0.8.3",
 "bincode",
@ -4690,13 +4686,13 @@ dependencies = [
 "imagesize",
 "infer",
 "lazy_static",
- "linkify",
 "lru-cache",
 "mail-auth",
 "mail-builder",
 "mail-parser",
 "mail-send",
 "md5",
+ "nlp",
 "num_cpus",
 "parking_lot",
 "rand 0.8.5",
--- a/crates/directory/src/cache/lookup.rs
+++ b/crates/directory/src/cache/lookup.rs
@ -23,7 +23,7 @@

 use mail_send::Credentials;

-use crate::{Directory, Principal, QueryColumn};
+use crate::{DatabaseColumn, Directory, Principal};

 use super::CachedDirectory;

@ -71,11 +71,15 @@ impl<T: Directory> Directory for CachedDirectory<T> {
        self.inner.expn(address).await
    }

-    async fn lookup(&self, query: &str, params: &[&str]) -> crate::Result<bool> {
+    async fn lookup(&self, query: &str, params: &[DatabaseColumn<'_>]) -> crate::Result<bool> {
        self.inner.lookup(query, params).await
    }

-    async fn query(&self, query: &str, params: &[&str]) -> crate::Result<Vec<QueryColumn>> {
+    async fn query(
+        &self,
+        query: &str,
+        params: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Vec<DatabaseColumn<'static>>> {
        self.inner.query(query, params).await
    }

--- a/crates/directory/src/imap/lookup.rs
+++ b/crates/directory/src/imap/lookup.rs
@ -24,7 +24,7 @@
 use mail_send::Credentials;
 use smtp_proto::{AUTH_CRAM_MD5, AUTH_LOGIN, AUTH_OAUTHBEARER, AUTH_PLAIN, AUTH_XOAUTH2};

-use crate::{Directory, DirectoryError, Principal, QueryColumn};
+use crate::{DatabaseColumn, Directory, DirectoryError, Principal};

 use super::{ImapDirectory, ImapError};

@ -98,11 +98,15 @@ impl Directory for ImapDirectory {
        Err(DirectoryError::unsupported("imap", "expn"))
    }

-    async fn lookup(&self, _query: &str, _params: &[&str]) -> crate::Result<bool> {
+    async fn lookup(&self, _: &str, _: &[DatabaseColumn<'_>]) -> crate::Result<bool> {
        Err(DirectoryError::unsupported("imap", "lookup"))
    }

-    async fn query(&self, _query: &str, _params: &[&str]) -> crate::Result<Vec<QueryColumn>> {
+    async fn query(
+        &self,
+        _: &str,
+        _: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Vec<DatabaseColumn<'static>>> {
        Err(DirectoryError::unsupported("imap", "query"))
    }

--- a/crates/directory/src/ldap/lookup.rs
+++ b/crates/directory/src/ldap/lookup.rs
@ -24,7 +24,7 @@
 use ldap3::{ResultEntry, Scope, SearchEntry};
 use mail_send::Credentials;

-use crate::{Directory, Principal, QueryColumn, Type};
+use crate::{DatabaseColumn, Directory, Principal, Type};

 use super::{LdapDirectory, LdapMappings};

@ -239,13 +239,17 @@ impl Directory for LdapDirectory {
        Ok(emails)
    }

-    async fn lookup(&self, query: &str, params: &[&str]) -> crate::Result<bool> {
+    async fn lookup(&self, query: &str, params: &[DatabaseColumn<'_>]) -> crate::Result<bool> {
        self.query_(query, params)
            .await
            .map(|entry| entry.is_some())
    }

-    async fn query(&self, query: &str, params: &[&str]) -> crate::Result<Vec<QueryColumn>> {
+    async fn query(
+        &self,
+        query: &str,
+        params: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Vec<DatabaseColumn<'static>>> {
        self.query_(query, params).await.map(|entry| {
            if let Some(entry) = entry {
                let mut object = String::new();
@ -257,7 +261,7 @@ impl Directory for LdapDirectory {
                        object.push('\n');
                    }
                }
-                vec![QueryColumn::Text(object)]
+                vec![DatabaseColumn::Text(object.into())]
            } else {
                vec![]
            }
@ -283,7 +287,11 @@ impl Directory for LdapDirectory {
 }

 impl LdapDirectory {
-    async fn query_(&self, query: &str, params: &[&str]) -> crate::Result<Option<ResultEntry>> {
+    async fn query_(
+        &self,
+        query: &str,
+        params: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Option<ResultEntry>> {
        let mut conn = self.pool.get().await?;
        tracing::trace!(context = "directory", event = "query", query = query, params = ?params);

@ -292,7 +300,7 @@ impl LdapDirectory {
            for (pos, item) in query.split('?').enumerate() {
                if pos > 0 {
                    if let Some(param) = params.get(pos - 1) {
-                        expanded_query.push_str(param);
+                        expanded_query.push_str(param.as_str());
                    }
                }
                expanded_query.push_str(item);
--- a/crates/directory/src/lib.rs
+++ b/crates/directory/src/lib.rs
@ -21,7 +21,11 @@
 * for more details.
 */

-use std::{borrow::Cow, fmt::Debug, sync::Arc};
+use std::{
+    borrow::Cow,
+    fmt::{Debug, Display},
+    sync::Arc,
+};

 use ahash::{AHashMap, AHashSet};
 use bb8::RunError;
@ -82,8 +86,12 @@ pub trait Directory: Sync + Send {
    async fn rcpt(&self, address: &str) -> crate::Result<bool>;
    async fn vrfy(&self, address: &str) -> Result<Vec<String>>;
    async fn expn(&self, address: &str) -> Result<Vec<String>>;
-    async fn lookup(&self, query: &str, params: &[&str]) -> Result<bool>;
-    async fn query(&self, query: &str, params: &[&str]) -> Result<Vec<QueryColumn>>;
+    async fn lookup(&self, query: &str, params: &[DatabaseColumn<'_>]) -> Result<bool>;
+    async fn query(
+        &self,
+        query: &str,
+        params: &[DatabaseColumn<'_>],
+    ) -> Result<Vec<DatabaseColumn<'static>>>;

    fn type_name(&self) -> &'static str {
        std::any::type_name::<Self>()
@ -91,12 +99,12 @@ pub trait Directory: Sync + Send {
 }

 #[derive(Clone, Debug)]
-pub enum QueryColumn {
+pub enum DatabaseColumn<'x> {
    Integer(i64),
    Bool(bool),
    Float(f64),
-    Text(String),
-    Blob(Vec<u8>),
+    Text(Cow<'x, str>),
+    Blob(Cow<'x, [u8]>),
    Null,
 }

@ -169,24 +177,24 @@ impl PartialEq for MatchType {
 impl Eq for MatchType {}

 impl Lookup {
-    pub async fn contains(&self, item: &str) -> Option<bool> {
+    pub async fn contains(&self, item: impl Into<DatabaseColumn<'_>>) -> Option<bool> {
        match self {
            Lookup::Directory { directory, query } => {
-                match directory.lookup(query, &[item]).await {
+                match directory.lookup(query, &[item.into()]).await {
                    Ok(result) => result.into(),
                    Err(_) => None,
                }
            }
-            Lookup::List { list } => list.contains(item).into(),
-            Lookup::Map { map } => map.contains_key(item).into(),
+            Lookup::List { list } => list.contains(item.into().as_str()).into(),
+            Lookup::Map { map } => map.contains_key(item.into().as_str()).into(),
        }
    }

-    pub async fn lookup(&self, item: &str) -> Option<Variable<'static>> {
+    pub async fn lookup(&self, items: &[DatabaseColumn<'_>]) -> Option<Variable<'static>> {
        match self {
-            Lookup::Directory { directory, query } => match directory.query(query, &[item]).await {
+            Lookup::Directory { directory, query } => match directory.query(query, items).await {
                Ok(mut result) => match result.len() {
-                    1 if !matches!(result.first(), Some(QueryColumn::Null)) => {
+                    1 if !matches!(result.first(), Some(DatabaseColumn::Null)) => {
                        result.pop().map(Variable::from).unwrap()
                    }
                    0 => Variable::default(),
@ -195,21 +203,34 @@ impl Lookup {
                .into(),
                Err(_) => None,
            },
-            Lookup::List { list } => Some(list.contains(item).into()),
-            Lookup::Map { map } => map.get(item).cloned(),
+            Lookup::List { list } => Some(list.contains(items[0].as_str()).into()),
+            Lookup::Map { map } => map.get(items[0].as_str()).cloned(),
+        }
+    }
+
+    pub async fn query(
+        &self,
+        items: &[DatabaseColumn<'_>],
+    ) -> Option<Vec<DatabaseColumn<'static>>> {
+        match self {
+            Lookup::Directory { directory, query } => match directory.query(query, items).await {
+                Ok(result) => Some(result),
+                Err(_) => None,
+            },
+            _ => None,
        }
    }
 }

-impl From<QueryColumn> for Variable<'static> {
-    fn from(value: QueryColumn) -> Self {
+impl<'x> From<DatabaseColumn<'x>> for Variable<'static> {
+    fn from(value: DatabaseColumn) -> Self {
        match value {
-            QueryColumn::Integer(v) => Variable::Integer(v),
-            QueryColumn::Bool(v) => Variable::Integer(i64::from(v)),
-            QueryColumn::Float(v) => Variable::Float(v),
-            QueryColumn::Text(v) => Variable::String(v),
-            QueryColumn::Blob(v) => Variable::String(v.into_string()),
-            QueryColumn::Null => Variable::StringRef(""),
+            DatabaseColumn::Integer(v) => Variable::Integer(v),
+            DatabaseColumn::Bool(v) => Variable::Integer(i64::from(v)),
+            DatabaseColumn::Float(v) => Variable::Float(v),
+            DatabaseColumn::Text(v) => Variable::String(v.into_owned()),
+            DatabaseColumn::Blob(v) => Variable::String(v.into_owned().into_string()),
+            DatabaseColumn::Null => Variable::StringRef(""),
        }
    }
 }
@ -457,3 +478,115 @@ impl AddressMapping {
        }
    }
 }
+
+impl<'x> DatabaseColumn<'x> {
+    pub fn as_str(&self) -> &str {
+        match self {
+            Self::Text(v) => v.as_ref(),
+            _ => "",
+        }
+    }
+}
+
+impl<'x> From<&'x str> for DatabaseColumn<'x> {
+    fn from(value: &'x str) -> Self {
+        Self::Text(value.into())
+    }
+}
+
+impl<'x> From<String> for DatabaseColumn<'x> {
+    fn from(value: String) -> Self {
+        Self::Text(value.into())
+    }
+}
+
+impl<'x> From<&'x String> for DatabaseColumn<'x> {
+    fn from(value: &'x String) -> Self {
+        Self::Text(value.into())
+    }
+}
+
+impl<'x> From<Cow<'x, str>> for DatabaseColumn<'x> {
+    fn from(value: Cow<'x, str>) -> Self {
+        Self::Text(value)
+    }
+}
+
+impl<'x> From<bool> for DatabaseColumn<'x> {
+    fn from(value: bool) -> Self {
+        Self::Bool(value)
+    }
+}
+
+impl<'x> From<i64> for DatabaseColumn<'x> {
+    fn from(value: i64) -> Self {
+        Self::Integer(value)
+    }
+}
+
+impl<'x> From<u64> for DatabaseColumn<'x> {
+    fn from(value: u64) -> Self {
+        Self::Integer(value as i64)
+    }
+}
+
+impl<'x> From<u32> for DatabaseColumn<'x> {
+    fn from(value: u32) -> Self {
+        Self::Integer(value as i64)
+    }
+}
+
+impl<'x> From<f64> for DatabaseColumn<'x> {
+    fn from(value: f64) -> Self {
+        Self::Float(value)
+    }
+}
+
+impl<'x> From<&'x [u8]> for DatabaseColumn<'x> {
+    fn from(value: &'x [u8]) -> Self {
+        Self::Blob(value.into())
+    }
+}
+
+impl<'x> From<Vec<u8>> for DatabaseColumn<'x> {
+    fn from(value: Vec<u8>) -> Self {
+        Self::Blob(value.into())
+    }
+}
+
+impl<'x> From<Variable<'x>> for DatabaseColumn<'x> {
+    fn from(value: Variable<'x>) -> Self {
+        match value {
+            Variable::String(v) => Self::Text(v.into()),
+            Variable::StringRef(v) => Self::Text(v.into()),
+            Variable::Integer(v) => Self::Integer(v),
+            Variable::Float(v) => Self::Float(v),
+            v => Self::Text(v.into_string().into()),
+        }
+    }
+}
+
+impl<'x> From<&'x Variable<'x>> for DatabaseColumn<'x> {
+    fn from(value: &'x Variable<'x>) -> Self {
+        match value {
+            Variable::String(v) => Self::Text(v.into()),
+            Variable::StringRef(v) => Self::Text((*v).into()),
+            Variable::Integer(v) => Self::Integer(*v),
+            Variable::Float(v) => Self::Float(*v),
+            v => Self::Text(v.to_string().into()),
+        }
+    }
+}
+
+impl<'x> Display for DatabaseColumn<'x> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            DatabaseColumn::Text(v) => f.write_str(v.as_ref()),
+            DatabaseColumn::Integer(v) => write!(f, "{}", v),
+            DatabaseColumn::Bool(v) => write!(f, "{}", v),
+            DatabaseColumn::Float(v) => write!(f, "{}", v),
+            DatabaseColumn::Blob(v) => write!(f, "{}", String::from_utf8_lossy(v.as_ref())),
+            DatabaseColumn::Null => write!(f, "NULL"),
+        }
+    }
+}
--- a/crates/directory/src/memory/lookup.rs
+++ b/crates/directory/src/memory/lookup.rs
@ -23,7 +23,7 @@

 use mail_send::Credentials;

-use crate::{Directory, DirectoryError, Principal, QueryColumn};
+use crate::{DatabaseColumn, Directory, DirectoryError, Principal};

 use super::{EmailType, MemoryDirectory};

@ -132,11 +132,15 @@ impl Directory for MemoryDirectory {
        Ok(result)
    }

-    async fn lookup(&self, _query: &str, _params: &[&str]) -> crate::Result<bool> {
+    async fn lookup(&self, _: &str, _: &[DatabaseColumn<'_>]) -> crate::Result<bool> {
        Err(DirectoryError::unsupported("memory", "lookp"))
    }

-    async fn query(&self, _query: &str, _params: &[&str]) -> crate::Result<Vec<QueryColumn>> {
+    async fn query(
+        &self,
+        _: &str,
+        _: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Vec<DatabaseColumn<'static>>> {
        Err(DirectoryError::unsupported("memory", "query"))
    }

--- a/crates/directory/src/smtp/lookup.rs
+++ b/crates/directory/src/smtp/lookup.rs
@ -24,7 +24,7 @@
 use mail_send::{smtp::AssertReply, Credentials};
 use smtp_proto::Severity;

-use crate::{Directory, DirectoryError, Principal, QueryColumn};
+use crate::{DatabaseColumn, Directory, DirectoryError, Principal};

 use super::{SmtpClient, SmtpDirectory};

@ -93,11 +93,15 @@ impl Directory for SmtpDirectory {
            .await
    }

-    async fn lookup(&self, _query: &str, _params: &[&str]) -> crate::Result<bool> {
+    async fn lookup(&self, _: &str, _: &[DatabaseColumn<'_>]) -> crate::Result<bool> {
        Err(DirectoryError::unsupported("smtp", "lookup"))
    }

-    async fn query(&self, _query: &str, _params: &[&str]) -> crate::Result<Vec<QueryColumn>> {
+    async fn query(
+        &self,
+        _: &str,
+        _: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Vec<DatabaseColumn<'static>>> {
        Err(DirectoryError::unsupported("smtp", "query"))
    }

--- a/crates/directory/src/sql/lookup.rs
+++ b/crates/directory/src/sql/lookup.rs
@ -25,7 +25,7 @@ use futures::TryStreamExt;
 use mail_send::Credentials;
 use sqlx::{any::AnyRow, postgres::any::AnyTypeInfoKind, Column, Row};

-use crate::{Directory, Principal, QueryColumn, Type};
+use crate::{DatabaseColumn, Directory, Principal, Type};

 use super::{SqlDirectory, SqlMappings};

@ -154,35 +154,39 @@ impl Directory for SqlDirectory {
            .map_err(Into::into)
    }

-    async fn lookup(&self, query: &str, params: &[&str]) -> crate::Result<bool> {
+    async fn lookup(&self, query: &str, params: &[DatabaseColumn<'_>]) -> crate::Result<bool> {
        self.query_(query, params).await.map(|row| row.is_some())
    }

-    async fn query(&self, query: &str, params: &[&str]) -> crate::Result<Vec<QueryColumn>> {
+    async fn query(
+        &self,
+        query: &str,
+        params: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Vec<DatabaseColumn<'static>>> {
        self.query_(query, params).await.map(|row| {
            if let Some(row) = row {
                let mut columns = Vec::with_capacity(row.columns().len());
                for col in row.columns() {
                    let idx = col.ordinal();
                    columns.push(match col.type_info().kind() {
-                        AnyTypeInfoKind::Null => QueryColumn::Null,
+                        AnyTypeInfoKind::Null => DatabaseColumn::Null,
                        AnyTypeInfoKind::Bool => {
-                            QueryColumn::Bool(row.try_get(idx).unwrap_or_default())
+                            DatabaseColumn::Bool(row.try_get(idx).unwrap_or_default())
                        }
                        AnyTypeInfoKind::SmallInt
                        | AnyTypeInfoKind::Integer
                        | AnyTypeInfoKind::BigInt => {
-                            QueryColumn::Integer(row.try_get(idx).unwrap_or_default())
+                            DatabaseColumn::Integer(row.try_get(idx).unwrap_or_default())
                        }
                        AnyTypeInfoKind::Real | AnyTypeInfoKind::Double => {
-                            QueryColumn::Float(row.try_get(idx).unwrap_or_default())
-                        }
-                        AnyTypeInfoKind::Text => {
-                            QueryColumn::Text(row.try_get(idx).unwrap_or_default())
-                        }
-                        AnyTypeInfoKind::Blob => {
-                            QueryColumn::Blob(row.try_get(idx).unwrap_or_default())
+                            DatabaseColumn::Float(row.try_get(idx).unwrap_or_default())
                        }
+                        AnyTypeInfoKind::Text => DatabaseColumn::Text(
+                            row.try_get::<String, _>(idx).unwrap_or_default().into(),
+                        ),
+                        AnyTypeInfoKind::Blob => DatabaseColumn::Blob(
+                            row.try_get::<Vec<u8>, _>(idx).unwrap_or_default().into(),
+                        ),
                    });
                }
                columns
@ -204,11 +208,24 @@ impl Directory for SqlDirectory {
 }

 impl SqlDirectory {
-    async fn query_(&self, query: &str, params: &[&str]) -> crate::Result<Option<AnyRow>> {
+    async fn query_(
+        &self,
+        query: &str,
+        params: &[DatabaseColumn<'_>],
+    ) -> crate::Result<Option<AnyRow>> {
        tracing::trace!(context = "directory", event = "query", query = query, params = ?params);
        let mut q = sqlx::query(query);
        for param in params {
-            q = q.bind(param);
+            q = match param {
+                DatabaseColumn::Text(v) => q.bind(v.as_ref()),
+                DatabaseColumn::Integer(v) => q.bind(v),
+                DatabaseColumn::Bool(v) => q.bind(v),
+                DatabaseColumn::Float(v) => q.bind(v),
+                DatabaseColumn::Blob(v) => {
+                    q.bind(std::str::from_utf8(v.as_ref()).unwrap_or_default())
+                }
+                DatabaseColumn::Null => q.bind(""),
+            }
        }

        q.fetch(&self.pool).try_next().await.map_err(Into::into)
--- a/crates/jmap/Cargo.toml
+++ b/crates/jmap/Cargo.toml
@ -37,8 +37,8 @@ p256 = { version = "0.13", features = ["ecdh"] }
 hkdf = "0.12.3"
 sha2 = "0.10.1"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls-webpki-roots"]}
-tokio-tungstenite = "0.20.0"
-tungstenite = "0.20.0"
+tokio-tungstenite = "0.20"
+tungstenite = "0.20"
 chrono = "0.4"
 dashmap = "5.4"
 aes = "0.8.3"
--- a/crates/nlp/Cargo.toml
+++ b/crates/nlp/Cargo.toml
@ -5,6 +5,7 @@ edition = "2021"
 resolver = "2"

 [dependencies]
+utils = { path = "../utils" }
 xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
 farmhash = "1.1.5"
 siphasher = "1.0"
@ -17,3 +18,12 @@ whatlang = "0.16" # Language detection
 rust-stemmers = "1.2" # Stemmers
 tinysegmenter = "0.1" # Japanese tokenizer
 jieba-rs = "0.6" # Chinese stemmer
+phf = { version = "0.11", features = ["macros"] }
+lru-cache = "0.1.2"
+parking_lot = "0.12.1"
+
+[features]
+test_mode = []
+
+[dev-dependencies]
+tokio = { version = "1.23", features = ["full"] }
--- a/crates/nlp/src/bayes/bloom.rs
+++ b/crates/nlp/src/bayes/bloom.rs
@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2023 Stalwart Labs Ltd.
- *
- * This file is part of the Stalwart Mail Server.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- * in the LICENSE file at the top-level directory of this distribution.
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * You can be released from the requirements of the AGPLv3 license by
- * purchasing a commercial license. Please contact licensing@stalw.art
- * for more details.
-*/
-
-use nohash::IsEnabled;
-
-use crate::transformers::osb::{Gram, OsbToken};
-
-use super::TokenHash;
-
-pub struct BloomHasher<'x, T: Iterator<Item = OsbToken<Gram<'x>>>> {
-    buf: Vec<u8>,
-    tokens: T,
-}
-
-impl<'x, T: Iterator<Item = OsbToken<Gram<'x>>>> BloomHasher<'x, T> {
-    pub fn new(tokens: T) -> Self {
-        Self {
-            buf: Vec::with_capacity(64),
-            tokens,
-        }
-    }
-}
-
-impl<'x, T: Iterator<Item = OsbToken<Gram<'x>>>> Iterator for BloomHasher<'x, T> {
-    type Item = OsbToken<TokenHash>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.tokens.next().map(|token| {
-            let bytes = match token.inner {
-                Gram::Uni { t1 } => t1.as_bytes(),
-                Gram::Bi { t1, t2, .. } => {
-                    self.buf.clear();
-                    self.buf.extend_from_slice(t1.as_bytes());
-                    self.buf.push(b' ');
-                    self.buf.extend_from_slice(t2.as_bytes());
-                    &self.buf
-                }
-            };
-
-            OsbToken {
-                inner: TokenHash {
-                    h1: xxhash_rust::xxh3::xxh3_64(bytes),
-                    h2: farmhash::hash64(bytes),
-                },
-                idx: token.idx,
-            }
-        })
-    }
-}
-
-impl std::hash::Hash for TokenHash {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        state.write_u64(self.h1 ^ self.h2);
-    }
-}
-
-impl IsEnabled for TokenHash {}
--- a/crates/nlp/src/bayes/cache.rs
+++ b/crates/nlp/src/bayes/cache.rs
@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023 Stalwart Labs Ltd.
+ *
+ * This file is part of the Stalwart Mail Server.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ * in the LICENSE file at the top-level directory of this distribution.
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can be released from the requirements of the AGPLv3 license by
+ * purchasing a commercial license. Please contact licensing@stalw.art
+ * for more details.
+*/
+
+use std::{
+    hash::BuildHasherDefault,
+    time::{Duration, Instant},
+};
+
+use lru_cache::LruCache;
+use nohash::NoHashHasher;
+use parking_lot::Mutex;
+
+use super::{TokenHash, Weights};
+
+#[derive(Debug)]
+pub struct BayesTokenCache {
+    positive: Mutex<LruCache<TokenHash, CacheItem, BuildHasherDefault<NoHashHasher<TokenHash>>>>,
+    negative: Mutex<LruCache<TokenHash, Instant, BuildHasherDefault<NoHashHasher<TokenHash>>>>,
+    ttl_negative: Duration,
+    ttl_positive: Duration,
+}
+
+#[derive(Debug, Clone)]
+pub struct CacheItem {
+    item: Weights,
+    valid_until: Instant,
+}
+
+impl BayesTokenCache {
+    pub fn new(capacity: usize, ttl_positive: Duration, ttl_negative: Duration) -> Self {
+        Self {
+            positive: Mutex::new(LruCache::with_hasher(capacity, Default::default())),
+            negative: Mutex::new(LruCache::with_hasher(capacity, Default::default())),
+            ttl_negative,
+            ttl_positive,
+        }
+    }
+
+    pub fn get(&self, hash: &TokenHash) -> Option<Option<Weights>> {
+        {
+            let mut pos_cache = self.positive.lock();
+            if let Some(entry) = pos_cache.get_mut(hash) {
+                return if entry.valid_until >= Instant::now() {
+                    Some(Some(entry.item))
+                } else {
+                    pos_cache.remove(hash);
+                    None
+                };
+            }
+        }
+        {
+            let mut neg_cache = self.negative.lock();
+            if let Some(entry) = neg_cache.get_mut(hash) {
+                return if *entry >= Instant::now() {
+                    Some(None)
+                } else {
+                    neg_cache.remove(hash);
+                    None
+                };
+            }
+        }
+
+        None
+    }
+
+    pub fn insert_positive(&self, hash: TokenHash, weights: Weights) {
+        self.positive.lock().insert(
+            hash,
+            CacheItem {
+                item: weights,
+                valid_until: Instant::now() + self.ttl_positive,
+            },
+        );
+    }
+
+    pub fn insert_negative(&self, hash: TokenHash) {
+        self.negative
+            .lock()
+            .insert(hash, Instant::now() + self.ttl_negative);
+    }
+
+    pub fn invalidate(&self, hash: &TokenHash) {
+        if self.positive.lock().remove(hash).is_none() {
+            self.negative.lock().remove(hash);
+        }
+    }
+}
--- a/crates/nlp/src/bayes/classify.rs
+++ b/crates/nlp/src/bayes/classify.rs
@ -21,13 +21,14 @@
 * for more details.
 */

-use crate::transformers::osb::OsbToken;
+use crate::tokenizers::osb::OsbToken;

 use super::{BayesClassifier, Weights};

 // Position 0 represents Unigram weights
 const FEATURE_WEIGHT: [f64; 8] = [1.0, 3125.0, 256.0, 27.0, 1.0, 0.0, 0.0, 0.0];

+// Credits: ported from RSpamd
 impl BayesClassifier {
    pub fn classify<T>(&self, tokens: T, ham_learns: u32, spam_learns: u32) -> Option<f64>
    where
--- a/crates/nlp/src/bayes/mod.rs
+++ b/crates/nlp/src/bayes/mod.rs
@ -26,8 +26,11 @@ use std::{collections::HashMap, hash::BuildHasherDefault};
 use nohash::NoHashHasher;
 use serde::{Deserialize, Serialize};

-pub mod bloom;
+use crate::tokenizers::osb::Gram;
+
+pub mod cache;
 pub mod classify;
+pub mod tokenize;
 pub mod train;

 #[derive(Debug, Serialize, Deserialize, Default)]
@ -37,7 +40,7 @@ pub struct BayesModel {
    pub ham_learns: u32,
 }

-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BayesClassifier {
    pub min_token_hits: u32,
    pub min_tokens: u32,
@ -47,14 +50,14 @@ pub struct BayesClassifier {

 #[derive(Debug, Serialize, Deserialize, Default, Copy, Clone, PartialEq, Eq)]
 pub struct TokenHash {
-    h1: u64,
-    h2: u64,
+    pub h1: u64,
+    pub h2: u64,
 }

 #[derive(Debug, Serialize, Deserialize, Default, Copy, Clone)]
 pub struct Weights {
-    spam: u32,
-    ham: u32,
+    pub spam: u32,
+    pub ham: u32,
 }

 impl BayesClassifier {
@ -73,3 +76,32 @@ impl Default for BayesClassifier {
        Self::new()
    }
 }
+
+impl From<Gram<'_>> for TokenHash {
+    fn from(value: Gram<'_>) -> Self {
+        match value {
+            Gram::Uni { t1 } => TokenHash {
+                h1: xxhash_rust::xxh3::xxh3_64(t1.as_bytes()),
+                h2: farmhash::hash64(t1.as_bytes()),
+            },
+            Gram::Bi { t1, t2, .. } => {
+                let mut buf = Vec::with_capacity(t1.len() + t2.len() + 1);
+                buf.extend_from_slice(t1.as_bytes());
+                buf.push(b' ');
+                buf.extend_from_slice(t2.as_bytes());
+                TokenHash {
+                    h1: xxhash_rust::xxh3::xxh3_64(&buf),
+                    h2: farmhash::hash64(&buf),
+                }
+            }
+        }
+    }
+}
+
+impl std::hash::Hash for TokenHash {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        state.write_u64(self.h1 ^ self.h2);
+    }
+}
+
+impl nohash::IsEnabled for TokenHash {}
--- a/crates/nlp/src/bayes/tokenize.rs
+++ b/crates/nlp/src/bayes/tokenize.rs
--- a/crates/nlp/src/bayes/train.rs
+++ b/crates/nlp/src/bayes/train.rs
@ -21,7 +21,7 @@
 * for more details.
 */

-use crate::transformers::osb::OsbToken;
+use crate::tokenizers::osb::OsbToken;

 use super::{BayesModel, TokenHash};

--- a/crates/nlp/src/language/mod.rs
+++ b/crates/nlp/src/language/mod.rs
@ -21,6 +21,10 @@
 * for more details.
 */

+pub mod detect;
+pub mod stemmer;
+pub mod stopwords;
+
 use std::borrow::Cow;

 use crate::tokenizers::{
@ -29,9 +33,6 @@ use crate::tokenizers::{

 use self::detect::LanguageDetector;

-pub mod detect;
-pub mod stemmer;
-
 pub type LanguageTokenizer<'x> = Box<dyn Iterator<Item = Token<Cow<'x, str>>> + 'x>;

 impl Language {
@ -131,57 +132,9 @@ pub enum Language {

 impl Language {
    pub fn from_iso_639(code: &str) -> Option<Self> {
-        match code.split_once('-').map(|c| c.0).unwrap_or(code) {
-            "en" => Language::English,
-            "es" => Language::Spanish,
-            "pt" => Language::Portuguese,
-            "it" => Language::Italian,
-            "fr" => Language::French,
-            "de" => Language::German,
-            "ru" => Language::Russian,
-            "zh" => Language::Mandarin,
-            "ja" => Language::Japanese,
-            "ar" => Language::Arabic,
-            "hi" => Language::Hindi,
-            "ko" => Language::Korean,
-            "bn" => Language::Bengali,
-            "he" => Language::Hebrew,
-            "ur" => Language::Urdu,
-            "fa" => Language::Persian,
-            "ml" => Language::Malayalam,
-            "or" => Language::Oriya,
-            "my" => Language::Burmese,
-            "ne" => Language::Nepali,
-            "si" => Language::Sinhalese,
-            "km" => Language::Khmer,
-            "tk" => Language::Turkmen,
-            "am" => Language::Amharic,
-            "az" => Language::Azerbaijani,
-            "id" => Language::Indonesian,
-            "te" => Language::Telugu,
-            "ta" => Language::Tamil,
-            "vi" => Language::Vietnamese,
-            "gu" => Language::Gujarati,
-            "pa" => Language::Punjabi,
-            "uz" => Language::Uzbek,
-            "hy" => Language::Armenian,
-            "ka" => Language::Georgian,
-            "la" => Language::Latin,
-            "sl" => Language::Slovene,
-            "hr" => Language::Croatian,
-            "sr" => Language::Serbian,
-            "mk" => Language::Macedonian,
-            "lt" => Language::Lithuanian,
-            "lv" => Language::Latvian,
-            "et" => Language::Estonian,
-            "tl" => Language::Tagalog,
-            "af" => Language::Afrikaans,
-            "zu" => Language::Zulu,
-            "sn" => Language::Shona,
-            "ak" => Language::Akan,
-            _ => return None,
-        }
-        .into()
+        LANG_ISO
+            .get(code.split_once('-').map(|c| c.0).unwrap_or(code))
+            .copied()
    }
 }

@ -200,3 +153,53 @@ impl Language {
        }
    }
 }
+
+static LANG_ISO: phf::Map<&'static str, Language> = phf::phf_map! {
+    "en" => Language::English,
+    "es" => Language::Spanish,
+    "pt" => Language::Portuguese,
+    "it" => Language::Italian,
+    "fr" => Language::French,
+    "de" => Language::German,
+    "ru" => Language::Russian,
+    "zh" => Language::Mandarin,
+    "ja" => Language::Japanese,
+    "ar" => Language::Arabic,
+    "hi" => Language::Hindi,
+    "ko" => Language::Korean,
+    "bn" => Language::Bengali,
+    "he" => Language::Hebrew,
+    "ur" => Language::Urdu,
+    "fa" => Language::Persian,
+    "ml" => Language::Malayalam,
+    "or" => Language::Oriya,
+    "my" => Language::Burmese,
+    "ne" => Language::Nepali,
+    "si" => Language::Sinhalese,
+    "km" => Language::Khmer,
+    "tk" => Language::Turkmen,
+    "am" => Language::Amharic,
+    "az" => Language::Azerbaijani,
+    "id" => Language::Indonesian,
+    "te" => Language::Telugu,
+    "ta" => Language::Tamil,
+    "vi" => Language::Vietnamese,
+    "gu" => Language::Gujarati,
+    "pa" => Language::Punjabi,
+    "uz" => Language::Uzbek,
+    "hy" => Language::Armenian,
+    "ka" => Language::Georgian,
+    "la" => Language::Latin,
+    "sl" => Language::Slovene,
+    "hr" => Language::Croatian,
+    "sr" => Language::Serbian,
+    "mk" => Language::Macedonian,
+    "lt" => Language::Lithuanian,
+    "lv" => Language::Latvian,
+    "et" => Language::Estonian,
+    "tl" => Language::Tagalog,
+    "af" => Language::Afrikaans,
+    "zu" => Language::Zulu,
+    "sn" => Language::Shona,
+    "ak" => Language::Akan,
+};
--- a/crates/nlp/src/language/stemmer.rs
+++ b/crates/nlp/src/language/stemmer.rs
@ -70,7 +70,7 @@ impl<'x> Iterator for Stemmer<'x> {
    }
 }

-static STEMMER_MAP: &[Option<Algorithm>] = &[
+pub static STEMMER_MAP: &[Option<Algorithm>] = &[
    None,                        // Esperanto = 0,
    Some(Algorithm::English),    // English = 1,
    Some(Algorithm::Russian),    // Russian = 2,
--- a/crates/nlp/src/language/stopwords.rs
+++ b/crates/nlp/src/language/stopwords.rs
--- a/crates/nlp/src/lib.rs
+++ b/crates/nlp/src/lib.rs
@ -1,59 +1,52 @@
-use ahash::AHashSet;
-
 pub mod bayes;
 pub mod language;
 pub mod tokenizers;
-pub mod transformers;
-
-#[derive(Debug, Clone, Default)]
-pub struct PublicSuffix {
-    pub suffixes: AHashSet<String>,
-    pub exceptions: AHashSet<String>,
-    pub wildcards: Vec<String>,
-}
-
-impl PublicSuffix {
-    pub fn contains(&self, suffix: &str) -> bool {
-        self.suffixes.contains(suffix)
-            || (!self.exceptions.contains(suffix)
-                && self.wildcards.iter().any(|w| suffix.ends_with(w)))
-    }
-}

 #[cfg(test)]
 mod test {
    use std::fs;

+    use utils::suffixlist::PublicSuffix;
+
    use crate::{
-        bayes::{bloom::BloomHasher, BayesClassifier, BayesModel},
-        transformers::osb::{OsbToken, OsbTokenizer},
+        bayes::{tokenize::BayesTokenizer, BayesClassifier, BayesModel},
+        tokenizers::osb::{OsbToken, OsbTokenizer},
    };

    #[test]
    #[ignore]
    fn train() {
-        let db = fs::read_to_string("spam_or_not_spam.csv").unwrap();
+        let db =
+            fs::read_to_string("/Users/me/code/mail-server/_ignore/spam_or_not_spam.csv").unwrap();
        let mut bayes = BayesModel::default();
+        let suffixes = PublicSuffix::default();

        for line in db.lines() {
            let (text, is_spam) = line.rsplit_once(',').unwrap();
            let is_spam = is_spam == "1";

            bayes.train(
-                BloomHasher::new(OsbTokenizer::new(text.split_ascii_whitespace(), 5)),
+                OsbTokenizer::new(BayesTokenizer::new(text, &suffixes), 5),
                is_spam,
            );
        }
        println!("Ham: {} Spam: {}", bayes.ham_learns, bayes.spam_learns,);
-        fs::write("spam_or_not_spam.bin", bincode::serialize(&bayes).unwrap()).unwrap();
+        fs::write(
+            "/Users/me/code/mail-server/_ignore/spam_or_not_spam.bin",
+            bincode::serialize(&bayes).unwrap(),
+        )
+        .unwrap();
    }

    #[test]
    #[ignore]
    fn classify() {
-        let model: BayesModel =
-            bincode::deserialize(&fs::read("spam_or_not_spam.bin").unwrap()).unwrap();
+        let model: BayesModel = bincode::deserialize(
+            &fs::read("/Users/me/code/mail-server/_ignore/spam_or_not_spam.bin").unwrap(),
+        )
+        .unwrap();
        let bayes = BayesClassifier::new();
+        let suffixes = PublicSuffix::default();

        for text in [
            "i am attaching to this email a presentation to integrate the spreadsheet into our server",
@ -65,7 +58,7 @@ mod test {
                "{:?} -> {}",
                text,
                bayes
-                    .classify(BloomHasher::new(OsbTokenizer::new(text.split_ascii_whitespace(), 5)).filter_map(|x| model.weights.get(&x.inner).map(|w| {
+                    .classify(OsbTokenizer::new(BayesTokenizer::new(text, &suffixes), 5).filter_map(|x| model.weights.get(&x.inner).map(|w| {
                        OsbToken {
                            idx: x.idx,
                            inner: *w,
--- a/crates/nlp/src/tokenizers/chinese.rs
+++ b/crates/nlp/src/tokenizers/chinese.rs
@ -29,7 +29,7 @@ use super::{InnerToken, Token};
 use lazy_static::lazy_static;

 lazy_static! {
-    static ref JIEBA: Jieba = Jieba::new();
+    pub static ref JIEBA: Jieba = Jieba::new();
 }

 pub struct ChineseTokenizer<'x, T, I>
--- a/crates/nlp/src/tokenizers/mod.rs
+++ b/crates/nlp/src/tokenizers/mod.rs
@ -23,6 +23,7 @@

 pub mod chinese;
 pub mod japanese;
+pub mod osb;
 pub mod space;
 pub mod types;
 pub mod word;
--- a/crates/nlp/src/tokenizers/osb.rs
+++ b/crates/nlp/src/tokenizers/osb.rs
@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2023 Stalwart Labs Ltd.
+ *
+ * This file is part of the Stalwart Mail Server.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ * in the LICENSE file at the top-level directory of this distribution.
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can be released from the requirements of the AGPLv3 license by
+ * purchasing a commercial license. Please contact licensing@stalw.art
+ * for more details.
+*/
+
+use std::{borrow::Cow, iter::Peekable};
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct OsbToken<T> {
+    pub inner: T,
+    pub idx: usize,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Gram<'x> {
+    Uni { t1: &'x str },
+    Bi { t1: &'x str, t2: &'x str },
+}
+
+pub struct OsbTokenizer<'x, I, R>
+where
+    I: Iterator<Item = Cow<'x, str>>,
+    R: for<'y> From<Gram<'y>> + 'static,
+{
+    iter: Peekable<I>,
+    buf: Vec<Option<Cow<'x, str>>>,
+    window_size: usize,
+    window_pos: usize,
+    window_idx: usize,
+    phantom: std::marker::PhantomData<R>,
+}
+
+impl<'x, I, R> OsbTokenizer<'x, I, R>
+where
+    I: Iterator<Item = Cow<'x, str>>,
+    R: for<'y> From<Gram<'y>> + 'static,
+{
+    pub fn new(iter: I, window_size: usize) -> Self {
+        Self {
+            iter: iter.peekable(),
+            buf: vec![None; window_size],
+            window_pos: 0,
+            window_idx: 0,
+            window_size,
+            phantom: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<'x, I, R> Iterator for OsbTokenizer<'x, I, R>
+where
+    I: Iterator<Item = Cow<'x, str>>,
+    R: for<'y> From<Gram<'y>> + 'static,
+{
+    type Item = OsbToken<R>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let end_pos = (self.window_pos + self.window_idx) % self.window_size;
+        if self.buf[end_pos].is_none() {
+            self.buf[end_pos] = self.iter.next();
+        }
+
+        let t1 = self.buf[self.window_pos % self.window_size].as_deref()?;
+        let token = OsbToken {
+            inner: R::from(if self.window_idx != 0 {
+                Gram::Bi {
+                    t1,
+                    t2: self.buf[end_pos].as_deref()?,
+                }
+            } else {
+                Gram::Uni { t1 }
+            }),
+            idx: self.window_idx,
+        };
+
+        // Increment window index
+        self.window_idx += 1;
+        if self.window_idx == self.window_size
+            || (self.iter.peek().is_none()
+                && self.buf[(self.window_pos + self.window_idx) % self.window_size].is_none())
+        {
+            self.buf[self.window_pos % self.window_size] = None;
+            self.window_idx = 0;
+            self.window_pos += 1;
+        }
+
+        Some(token)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::borrow::Cow;
+
+    use crate::tokenizers::osb::{Gram, OsbToken};
+
+    impl From<Gram<'_>> for String {
+        fn from(value: Gram<'_>) -> Self {
+            match value {
+                Gram::Uni { t1 } => t1.to_string(),
+                Gram::Bi { t1, t2 } => format!("{t1} {t2}"),
+            }
+        }
+    }
+
+    #[test]
+    fn osb_tokenizer() {
+        assert_eq!(
+            super::OsbTokenizer::new(
+                "The quick brown fox jumps over the lazy dog and the lazy cat"
+                    .split_ascii_whitespace()
+                    .map(Cow::from),
+                5,
+            )
+            .collect::<Vec<_>>(),
+            vec![
+                OsbToken {
+                    inner: "The".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "The quick".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "The brown".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "The fox".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "The jumps".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "quick".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "quick brown".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "quick fox".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "quick jumps".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "quick over".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "brown".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "brown fox".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "brown jumps".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "brown over".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "brown the".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "fox".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "fox jumps".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "fox over".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "fox the".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "fox lazy".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "jumps".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "jumps over".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "jumps the".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "jumps lazy".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "jumps dog".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "over".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "over the".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "over lazy".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "over dog".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "over and".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "the".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "the lazy".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "the dog".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "the and".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "the the".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "lazy".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "lazy dog".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "lazy and".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "lazy the".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "lazy lazy".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "dog".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "dog and".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "dog the".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "dog lazy".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "dog cat".to_string(),
+                    idx: 4
+                },
+                OsbToken {
+                    inner: "and".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "and the".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "and lazy".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "and cat".to_string(),
+                    idx: 3
+                },
+                OsbToken {
+                    inner: "the".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "the lazy".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "the cat".to_string(),
+                    idx: 2
+                },
+                OsbToken {
+                    inner: "lazy".to_string(),
+                    idx: 0
+                },
+                OsbToken {
+                    inner: "lazy cat".to_string(),
+                    idx: 1
+                },
+                OsbToken {
+                    inner: "cat".to_string(),
+                    idx: 0
+                }
+            ]
+        );
+    }
+}
--- a/crates/nlp/src/tokenizers/types.rs
+++ b/crates/nlp/src/tokenizers/types.rs
@ -23,7 +23,7 @@

 use std::str::CharIndices;

-use crate::PublicSuffix;
+use utils::suffixlist::PublicSuffix;

 use super::Token;

@ -31,35 +31,39 @@ pub struct TypesTokenizer<'x, 'y> {
    text: &'x str,
    suffixes: &'y PublicSuffix,
    iter: CharIndices<'x>,
-    tokens: Vec<Token<TokenType<'x>>>,
+    tokens: Vec<Token<TokenType<&'x str>>>,
    peek_pos: usize,
    last_ch_is_space: bool,
    last_token_is_dot: bool,
    eof: bool,
+    tokenize_urls: bool,
+    tokenize_urls_without_scheme: bool,
+    tokenize_emails: bool,
+    tokenize_numbers: bool,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum TokenType<'x> {
-    Alphabetic(&'x str),
-    Integer(&'x str),
-    Alphanumeric(&'x str),
-    Hexadecimal(&'x str),
+pub enum TokenType<T> {
+    Alphabetic(T),
+    Integer(T),
+    Alphanumeric(T),
+    Hexadecimal(T),
    Other(char),
    Punctuation(char),
    Space,

    // Detected types
-    Url(&'x str),
-    UrlNoScheme(&'x str),
-    UrlNoHost(&'x str),
-    Email(&'x str),
-    Float(&'x str),
+    Url(T),
+    UrlNoScheme(T),
+    UrlNoHost(T),
+    Email(T),
+    Float(T),
 }

-impl Copy for Token<TokenType<'_>> {}
+impl Copy for Token<TokenType<&'_ str>> {}

 impl<'x, 'y> Iterator for TypesTokenizer<'x, 'y> {
-    type Item = Token<TokenType<'x>>;
+    type Item = Token<TokenType<&'x str>>;

    fn next(&mut self) -> Option<Self::Item> {
        let token = self.peek()?;
@ -67,7 +71,8 @@ impl<'x, 'y> Iterator for TypesTokenizer<'x, 'y> {
        self.last_token_is_dot = matches!(token.word, TokenType::Punctuation('.'));

        // Try parsing URL with scheme
-        if matches!(
+        if self.tokenize_urls
+            && matches!(
            token.word,
            TokenType::Alphabetic(t) | TokenType::Hexadecimal(t)
            if t.len() <= 8 && t.chars().all(|c| c.is_ascii()))
@ -82,7 +87,8 @@ impl<'x, 'y> Iterator for TypesTokenizer<'x, 'y> {
        }

        // Try parsing email
-        if token.word.is_email_atom()
+        if self.tokenize_emails
+            && token.word.is_email_atom()
            && self.peek_has_tokens(
                &[TokenType::Punctuation('@'), TokenType::Punctuation('.')],
                TokenType::Space,
@ -97,7 +103,8 @@ impl<'x, 'y> Iterator for TypesTokenizer<'x, 'y> {
        }

        // Try parsing URL without scheme
-        if token.word.is_domain_atom(true)
+        if self.tokenize_urls_without_scheme
+            && token.word.is_domain_atom(true)
            && self.peek_has_tokens(&[TokenType::Punctuation('.')], TokenType::Space)
        {
            if let Some(url) = self.try_parse_url(None) {
@ -109,7 +116,7 @@ impl<'x, 'y> Iterator for TypesTokenizer<'x, 'y> {
        }

        // Try parsing currencies and floating point numbers
-        if !last_is_dot {
+        if self.tokenize_numbers && !last_is_dot {
            if let Some(num) = self.try_parse_number() {
                self.peek_advance();
                return Some(num);
@ -132,9 +139,33 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
            suffixes,
            last_ch_is_space: false,
            last_token_is_dot: false,
+            tokenize_urls: true,
+            tokenize_urls_without_scheme: true,
+            tokenize_emails: true,
+            tokenize_numbers: true,
        }
    }

+    pub fn tokenize_urls(mut self, tokenize: bool) -> Self {
+        self.tokenize_urls = tokenize;
+        self
+    }
+
+    pub fn tokenize_urls_without_scheme(mut self, tokenize: bool) -> Self {
+        self.tokenize_urls_without_scheme = tokenize;
+        self
+    }
+
+    pub fn tokenize_emails(mut self, tokenize: bool) -> Self {
+        self.tokenize_emails = tokenize;
+        self
+    }
+
+    pub fn tokenize_numbers(mut self, tokenize: bool) -> Self {
+        self.tokenize_numbers = tokenize;
+        self
+    }
+
    fn consume(&mut self) -> bool {
        let mut has_alpha = false;
        let mut has_number = false;
@ -212,7 +243,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
        }
    }

-    fn next_(&mut self) -> Option<Token<TokenType<'x>>> {
+    fn next_(&mut self) -> Option<Token<TokenType<&'x str>>> {
        if self.tokens.is_empty() && !self.eof {
            self.consume();
        }
@ -223,7 +254,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
        }
    }

-    fn peek(&mut self) -> Option<Token<TokenType<'x>>> {
+    fn peek(&mut self) -> Option<Token<TokenType<&'x str>>> {
        while self.tokens.len() <= self.peek_pos && !self.eof {
            self.consume();
        }
@ -244,7 +275,11 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
        self.peek_pos = 0;
    }

-    fn peek_has_tokens(&mut self, tokens: &[TokenType<'_>], stop_token: TokenType<'_>) -> bool {
+    fn peek_has_tokens(
+        &mut self,
+        tokens: &[TokenType<&'_ str>],
+        stop_token: TokenType<&'_ str>,
+    ) -> bool {
        let mut tokens = tokens.iter().copied();
        let mut token = tokens.next().unwrap();
        while let Some(t) = self.peek() {
@ -266,8 +301,8 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {

    fn try_parse_url(
        &mut self,
-        scheme_token: Option<Token<TokenType<'_>>>,
-    ) -> Option<Token<TokenType<'x>>> {
+        scheme_token: Option<Token<TokenType<&'_ str>>>,
+    ) -> Option<Token<TokenType<&'x str>>> {
        let (has_scheme, allow_blank_host) = scheme_token.as_ref().map_or((false, false), |t| {
            (
                true,
@ -480,7 +515,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
        .into()
    }

-    fn try_parse_email(&mut self) -> Option<Token<TokenType<'x>>> {
+    fn try_parse_email(&mut self) -> Option<Token<TokenType<&'x str>>> {
        // Start token is a valid local part atom
        let start_token = self.peek()?;
        let mut last_is_dot = false;
@ -615,7 +650,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
        None
    }

-    fn try_parse_number(&mut self) -> Option<Token<TokenType<'x>>> {
+    fn try_parse_number(&mut self) -> Option<Token<TokenType<&'x str>>> {
        self.peek_rewind();
        let mut start_pos = usize::MAX;
        let mut end_pos = usize::MAX;
@ -698,7 +733,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
    }
 }

-impl<'x> TokenType<'x> {
+impl<T> TokenType<T> {
    fn is_email_atom(&self) -> bool {
        matches!(
            self,
@ -744,7 +779,8 @@ impl<'x> TokenType<'x> {

 #[cfg(test)]
 mod test {
-    use crate::PublicSuffix;
+
+    use utils::suffixlist::PublicSuffix;

    use super::{TokenType, TypesTokenizer};

--- a/crates/nlp/src/transformers/mod.rs
+++ b/crates/nlp/src/transformers/mod.rs
@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2023 Stalwart Labs Ltd.
- *
- * This file is part of the Stalwart Mail Server.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- * in the LICENSE file at the top-level directory of this distribution.
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * You can be released from the requirements of the AGPLv3 license by
- * purchasing a commercial license. Please contact licensing@stalw.art
- * for more details.
-*/
-
-pub mod osb;
--- a/crates/nlp/src/transformers/osb.rs
+++ b/crates/nlp/src/transformers/osb.rs
@ -1,467 +0,0 @@
-/*
- * Copyright (c) 2023 Stalwart Labs Ltd.
- *
- * This file is part of the Stalwart Mail Server.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as
- * published by the Free Software Foundation, either version 3 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- * in the LICENSE file at the top-level directory of this distribution.
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * You can be released from the requirements of the AGPLv3 license by
- * purchasing a commercial license. Please contact licensing@stalw.art
- * for more details.
-*/
-
-use std::iter::Peekable;
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct OsbToken<T> {
-    pub inner: T,
-    pub idx: usize,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum Gram<'x> {
-    Uni { t1: &'x str },
-    Bi { t1: &'x str, t2: &'x str },
-}
-
-pub struct OsbTokenizer<'x, I>
-where
-    I: Iterator<Item = &'x str>,
-{
-    iter: Peekable<I>,
-    buf: Vec<Option<&'x str>>,
-    window_size: usize,
-    window_pos: usize,
-    window_idx: usize,
-}
-
-impl<'x, I> OsbTokenizer<'x, I>
-where
-    I: Iterator<Item = &'x str>,
-{
-    pub fn new(iter: I, window_size: usize) -> Self {
-        Self {
-            iter: iter.peekable(),
-            buf: vec![None; window_size],
-            window_pos: 0,
-            window_idx: 0,
-            window_size,
-        }
-    }
-}
-
-impl<'x, I> Iterator for OsbTokenizer<'x, I>
-where
-    I: Iterator<Item = &'x str>,
-{
-    type Item = OsbToken<Gram<'x>>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let end_pos = (self.window_pos + self.window_idx) % self.window_size;
-        if self.buf[end_pos].is_none() {
-            self.buf[end_pos] = self.iter.next();
-        }
-
-        let t1 = self.buf[self.window_pos % self.window_size]?;
-        let token = OsbToken {
-            inner: if self.window_idx != 0 {
-                Gram::Bi {
-                    t1,
-                    t2: self.buf[end_pos]?,
-                }
-            } else {
-                Gram::Uni { t1 }
-            },
-            idx: self.window_idx,
-        };
-
-        // Increment window
-        self.window_idx += 1;
-        if self.window_idx == self.window_size
-            || (self.iter.peek().is_none()
-                && self.buf[(self.window_pos + self.window_idx) % self.window_size].is_none())
-        {
-            self.buf[self.window_pos % self.window_size] = None;
-            self.window_idx = 0;
-            self.window_pos += 1;
-        }
-
-        Some(token)
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use crate::transformers::osb::{Gram, OsbToken};
-
-    #[test]
-    fn osb_tokenizer() {
-        assert_eq!(
-            super::OsbTokenizer::new(
-                "The quick brown fox jumps over the lazy dog and the lazy cat"
-                    .split_ascii_whitespace(),
-                5
-            )
-            .collect::<Vec<_>>(),
-            vec![
-                OsbToken {
-                    inner: Gram::Uni { t1: "The" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "The",
-                        t2: "quick"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "The",
-                        t2: "brown"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "The",
-                        t2: "fox"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "The",
-                        t2: "jumps"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "quick" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "quick",
-                        t2: "brown"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "quick",
-                        t2: "fox"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "quick",
-                        t2: "jumps"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "quick",
-                        t2: "over"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "brown" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "brown",
-                        t2: "fox"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "brown",
-                        t2: "jumps"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "brown",
-                        t2: "over"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "brown",
-                        t2: "the"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "fox" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "fox",
-                        t2: "jumps"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "fox",
-                        t2: "over"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "fox",
-                        t2: "the"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "fox",
-                        t2: "lazy"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "jumps" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "jumps",
-                        t2: "over"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "jumps",
-                        t2: "the"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "jumps",
-                        t2: "lazy"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "jumps",
-                        t2: "dog"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "over" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "over",
-                        t2: "the"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "over",
-                        t2: "lazy"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "over",
-                        t2: "dog"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "over",
-                        t2: "and"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "the" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "the",
-                        t2: "lazy"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "the",
-                        t2: "dog"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "the",
-                        t2: "and"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "the",
-                        t2: "the"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "lazy" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "lazy",
-                        t2: "dog"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "lazy",
-                        t2: "and"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "lazy",
-                        t2: "the"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "lazy",
-                        t2: "lazy"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "dog" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "dog",
-                        t2: "and"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "dog",
-                        t2: "the"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "dog",
-                        t2: "lazy"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "dog",
-                        t2: "cat"
-                    },
-                    idx: 4
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "and" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "and",
-                        t2: "the"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "and",
-                        t2: "lazy"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "and",
-                        t2: "cat"
-                    },
-                    idx: 3
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "the" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "the",
-                        t2: "lazy"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "the",
-                        t2: "cat"
-                    },
-                    idx: 2
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "lazy" },
-                    idx: 0
-                },
-                OsbToken {
-                    inner: Gram::Bi {
-                        t1: "lazy",
-                        t2: "cat"
-                    },
-                    idx: 1
-                },
-                OsbToken {
-                    inner: Gram::Uni { t1: "cat" },
-                    idx: 0
-                }
-            ]
-        );
-    }
-}
--- a/crates/smtp/Cargo.toml
+++ b/crates/smtp/Cargo.toml
@ -13,6 +13,7 @@ resolver = "2"

 [dependencies]
 utils = { path =  "../utils" }
+nlp = { path =  "../nlp" }
 directory = { path =  "../directory" }
 mail-auth = { git = "https://github.com/stalwartlabs/mail-auth" }
 mail-send = { git = "https://github.com/stalwartlabs/mail-send", default-features = false, features = ["cram-md5", "skip-ehlo"] }
@ -50,7 +51,6 @@ num_cpus = "1.15.0"
 lazy_static = "1.4"
 whatlang = "0.16"
 imagesize = "0.12"
-linkify = "0.10"
 idna = "0.4"
 decancer = "1.6.1"
 unicode-security = "0.1.0"
--- a/crates/smtp/src/config/mod.rs
+++ b/crates/smtp/src/config/mod.rs
@ -39,7 +39,7 @@ use std::{
    time::Duration,
 };

-use ahash::{AHashMap, AHashSet};
+use ahash::AHashMap;
 use directory::{Directory, DirectoryConfig, Lookup};
 use mail_auth::{
    common::crypto::{Ed25519Key, RsaKey, Sha256},
@ -541,13 +541,6 @@ pub enum VerifyStrategy {
    Disable,
 }

-#[derive(Debug, Clone, Default)]
-pub struct PublicSuffix {
-    pub suffixes: AHashSet<String>,
-    pub exceptions: AHashSet<String>,
-    pub wildcards: Vec<String>,
-}
-
 #[derive(Default)]
 pub struct ConfigContext<'x> {
    pub servers: &'x [Server],
--- a/crates/smtp/src/config/resolver.rs
+++ b/crates/smtp/src/config/resolver.rs
@ -34,9 +34,7 @@ use mail_auth::{
 };

 use crate::{core::Resolvers, outbound::dane::DnssecResolver};
-use utils::config::Config;
-
-use super::PublicSuffix;
+use utils::{config::Config, suffixlist::PublicSuffix};

 pub trait ConfigResolver {
    fn build_resolvers(&self) -> super::Result<Resolvers>;
@ -108,9 +106,9 @@ impl ConfigResolver for Config {
    }

    fn parse_public_suffix(&self) -> super::Result<PublicSuffix> {
-        let mut ps = PublicSuffix::default();
-
+        let mut has_values = false;
        for (_, value) in self.values("resolver.public-suffix") {
+            has_values = true;
            let bytes = if value.starts_with("https://") || value.starts_with("http://") {
                match tokio::task::block_in_place(|| {
                    reqwest::blocking::get(value).and_then(|r| {
@ -175,20 +173,7 @@ impl ConfigResolver for Config {

            match String::from_utf8(bytes) {
                Ok(list) => {
-                    for line in list.lines() {
-                        let line = line.trim().to_lowercase();
-                        if !line.starts_with("//") {
-                            if let Some(domain) = line.strip_prefix('*') {
-                                ps.wildcards.push(domain.to_string());
-                            } else if let Some(domain) = line.strip_prefix('!') {
-                                ps.exceptions.insert(domain.to_string());
-                            } else {
-                                ps.suffixes.insert(line.to_string());
-                            }
-                        }
-                    }
-
-                    return Ok(ps);
+                    return Ok(PublicSuffix::from(list.as_str()));
                }
                Err(err) => {
                    tracing::warn!(
@ -200,16 +185,10 @@ impl ConfigResolver for Config {
            }
        }

-        tracing::warn!("Failed to parse public suffixes from any source.");
+        if has_values {
+            tracing::warn!("Failed to parse public suffixes from any source.");
+        }

-        Ok(ps)
-    }
-}
-
-impl PublicSuffix {
-    pub fn contains(&self, suffix: &str) -> bool {
-        self.suffixes.contains(suffix)
-            || (!self.exceptions.contains(suffix)
-                && self.wildcards.iter().any(|w| suffix.ends_with(w)))
+        Ok(PublicSuffix::default())
    }
 }
--- a/crates/smtp/src/config/scripts.rs
+++ b/crates/smtp/src/config/scripts.rs
@ -21,25 +21,33 @@
 * for more details.
 */

-use std::time::Duration;
+use std::{sync::Arc, time::Duration};

+use directory::Lookup;
+use nlp::bayes::{cache::BayesTokenCache, BayesClassifier};
 use sieve::{compiler::grammar::Capability, Compiler, Runtime};

 use crate::{
    core::{SieveConfig, SieveCore},
    scripts::{functions::register_functions, plugins::RegisterSievePlugins},
 };
-use utils::config::{utils::AsKey, Config};
+use utils::{
+    config::{utils::AsKey, Config},
+    suffixlist::PublicSuffix,
+};

-use super::{resolver::ConfigResolver, ConfigContext, PublicSuffix};
+use super::{resolver::ConfigResolver, ConfigContext};

 pub trait ConfigSieve {
    fn parse_sieve(&self, ctx: &mut ConfigContext) -> super::Result<SieveCore>;
 }

-#[derive(Clone, Default)]
 pub struct SieveContext {
    pub psl: PublicSuffix,
+    pub bayes_classify: BayesClassifier,
+    pub bayes_cache: BayesTokenCache,
+    pub lookup_classify: Arc<Lookup>,
+    pub lookup_train: Arc<Lookup>,
 }

 impl ConfigSieve for Config {
@ -48,6 +56,29 @@ impl ConfigSieve for Config {
        let mut fnc_map = register_functions().register_plugins();
        let sieve_ctx = SieveContext {
            psl: self.parse_public_suffix()?,
+            bayes_classify: BayesClassifier {
+                min_token_hits: self.property_or_static("bayes.min-token-hits", "2")?,
+                min_tokens: self.property_or_static("bayes.min-tokens", "11")?,
+                min_prob_strength: self.property_or_static("bayes.min-prob-strength", "0.05")?,
+                min_learns: self.property_or_static("bayes.min-learns", "200")?,
+            },
+            bayes_cache: BayesTokenCache::new(
+                self.property_or_static("bayes.cache.capacity", "8192")?,
+                self.property_or_static("bayes.cache.ttl.positive", "1h")?,
+                self.property_or_static("bayes.cache.ttl.negative", "1h")?,
+            ),
+            lookup_classify: ctx
+                .directory
+                .lookups
+                .get("bayes.tokens.classify")
+                .ok_or("No lookup found for key bayes.tokens.classify.".to_string())?
+                .clone(),
+            lookup_train: ctx
+                .directory
+                .lookups
+                .get("bayes.tokens.train")
+                .ok_or("No lookup found for key bayes.tokens.train.".to_string())?
+                .clone(),
        };

        // Allocate compiler and runtime
--- a/crates/smtp/src/scripts/event_loop.rs
+++ b/crates/smtp/src/scripts/event_loop.rs
@ -24,7 +24,6 @@
 use core::panic;
 use std::{sync::Arc, time::Duration};

-use ahash::AHashMap;
 use directory::Lookup;
 use mail_auth::common::headers::HeaderWriter;
 use sieve::{
@ -68,8 +67,6 @@ impl SMTP {
        let mut modifications = vec![];
        let mut keep_id = usize::MAX;

-        let mut plugin_data = AHashMap::new();
-
        // Start event loop
        while let Some(result) = instance.run(input) {
            match result {
@ -125,7 +122,6 @@ impl SMTP {
                                span: &span,
                                handle: &handle,
                                core: self,
-                                data: &mut plugin_data,
                                message: instance.message(),
                                arguments,
                            },
--- a/crates/smtp/src/scripts/functions/text.rs
+++ b/crates/smtp/src/scripts/functions/text.rs
@ -21,11 +21,12 @@
 * for more details.
 */

+use nlp::tokenizers::types::{TokenType, TypesTokenizer};
 use sieve::{runtime::Variable, Context};

-use crate::{config::scripts::SieveContext, scripts::functions::url::tokenize_email};
+use crate::config::scripts::SieveContext;

-use super::{html::html_to_tokens, url::tokenize_url, ApplyString};
+use super::{html::html_to_tokens, ApplyString};

 pub fn fn_trim<'x>(_: &'x Context<'x, SieveContext>, v: Vec<Variable<'x>>) -> Variable<'x> {
    v[0].transform(|s| Variable::StringRef(s.trim()))
@ -106,13 +107,49 @@ pub fn fn_tokenize<'x>(
    ctx: &'x Context<'x, SieveContext>,
    mut v: Vec<Variable<'x>>,
 ) -> Variable<'x> {
-    match v[1].to_cow().as_ref() {
-        "html" => html_to_tokens(v[0].to_cow().as_ref()).into(),
-        "words" => tokenize_words(&v[0]),
-        "uri" | "url" => tokenize_url(ctx, v.remove(0), false),
-        "uri_strict" | "url_strict" => tokenize_url(ctx, v.remove(0), true),
-        "email" => tokenize_email(v.remove(0)),
-        _ => Variable::default(),
+    let (urls, urls_without_scheme, emails) = match v[1].to_cow().as_ref() {
+        "html" => return html_to_tokens(v[0].to_cow().as_ref()).into(),
+        "words" => return tokenize_words(&v[0]),
+        "uri" | "url" => (true, true, true),
+        "uri_strict" | "url_strict" => (true, false, false),
+        "email" => (false, false, true),
+        _ => return Variable::default(),
+    };
+
+    match v.remove(0) {
+        Variable::StringRef(text) => TypesTokenizer::new(text, &ctx.context().psl)
+            .tokenize_numbers(false)
+            .tokenize_urls(urls)
+            .tokenize_urls_without_scheme(urls_without_scheme)
+            .tokenize_emails(emails)
+            .filter_map(|t| match t.word {
+                TokenType::Url(text) if urls => Variable::StringRef(text).into(),
+                TokenType::UrlNoScheme(text) if urls_without_scheme => {
+                    Variable::String(format!("https://{text}")).into()
+                }
+                TokenType::Email(text) if emails => Variable::StringRef(text).into(),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .into(),
+        v @ (Variable::String(_) | Variable::Array(_) | Variable::ArrayRef(_)) => {
+            TypesTokenizer::new(v.to_cow().as_ref(), &ctx.context().psl)
+                .tokenize_numbers(false)
+                .tokenize_urls(urls)
+                .tokenize_urls_without_scheme(urls_without_scheme)
+                .tokenize_emails(emails)
+                .filter_map(|t| match t.word {
+                    TokenType::Url(text) if urls => Variable::String(text.to_string()).into(),
+                    TokenType::UrlNoScheme(text) if urls_without_scheme => {
+                        Variable::String(format!("https://{text}")).into()
+                    }
+                    TokenType::Email(text) if emails => Variable::String(text.to_string()).into(),
+                    _ => None,
+                })
+                .collect::<Vec<_>>()
+                .into()
+        }
+        v => v,
    }
 }

--- a/crates/smtp/src/scripts/functions/url.rs
+++ b/crates/smtp/src/scripts/functions/url.rs
@ -21,94 +21,13 @@
 * for more details.
 */

-use std::net::IpAddr;
-
 use hyper::Uri;
-use linkify::LinkKind;
 use sieve::{runtime::Variable, Context};

 use crate::config::scripts::SieveContext;

 use super::ApplyString;

-pub fn tokenize_url<'x>(
-    ctx: &'x Context<'x, SieveContext>,
-    v: Variable<'x>,
-    must_have_scheme: bool,
-) -> Variable<'x> {
-    match v {
-        Variable::StringRef(text) => linkify::LinkFinder::new()
-            .url_must_have_scheme(must_have_scheme)
-            .kinds(&[LinkKind::Url])
-            .links(text.as_ref())
-            .filter_map(|url| filter_url(url.as_str(), must_have_scheme, ctx))
-            .collect::<Vec<_>>()
-            .into(),
-        v @ (Variable::String(_) | Variable::Array(_) | Variable::ArrayRef(_)) => {
-            linkify::LinkFinder::new()
-                .url_must_have_scheme(must_have_scheme)
-                .kinds(&[LinkKind::Url])
-                .links(v.to_cow().as_ref())
-                .filter_map(|url| {
-                    filter_url(url.as_str(), must_have_scheme, ctx).map(|v| v.into_owned())
-                })
-                .collect::<Vec<_>>()
-                .into()
-        }
-        v => v,
-    }
-}
-
-pub fn tokenize_email(v: Variable<'_>) -> Variable<'_> {
-    match v {
-        Variable::StringRef(text) => linkify::LinkFinder::new()
-            .email_domain_must_have_dot(true)
-            .kinds(&[LinkKind::Email])
-            .links(text.as_ref())
-            .map(|email| Variable::StringRef(email.as_str()))
-            .collect::<Vec<_>>()
-            .into(),
-        v @ (Variable::String(_) | Variable::Array(_) | Variable::ArrayRef(_)) => {
-            linkify::LinkFinder::new()
-                .email_domain_must_have_dot(true)
-                .kinds(&[LinkKind::Email])
-                .links(v.to_cow().as_ref())
-                .map(|email| Variable::String(email.as_str().to_string()))
-                .collect::<Vec<_>>()
-                .into()
-        }
-        v => v,
-    }
-}
-
-fn filter_url<'x, 'y>(
-    url: &'x str,
-    must_have_scheme: bool,
-    ctx: &'y Context<'y, SieveContext>,
-) -> Option<Variable<'x>> {
-    if must_have_scheme || url.contains("://") {
-        Some(Variable::StringRef(url))
-    } else {
-        // Filter out possible URLs without a valid TLD
-        let host = url.split_once('/').map_or(url, |(f, _)| f);
-        if (host
-            .as_bytes()
-            .first()
-            .map_or(true, |ch| ch.is_ascii_hexdigit())
-            && host.parse::<IpAddr>().is_ok())
-            || ctx
-                .context()
-                .psl
-                .contains(host.rsplit_once('.').map_or(host, |(_, tld)| tld))
-            || host.ends_with(".onion")
-        {
-            Some(Variable::String(format!("https://{url}")))
-        } else {
-            None
-        }
-    }
-}
-
 pub fn fn_uri_part<'x>(_: &'x Context<'x, SieveContext>, v: Vec<Variable<'x>>) -> Variable<'x> {
    let part = v[1].to_cow();
    v[0].transform(|uri| {
--- a/crates/smtp/src/scripts/plugins/bayes.rs
+++ b/crates/smtp/src/scripts/plugins/bayes.rs
@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2023 Stalwart Labs Ltd.
+ *
+ * This file is part of Stalwart Mail Server.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ * in the LICENSE file at the top-level directory of this distribution.
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can be released from the requirements of the AGPLv3 license by
+ * purchasing a commercial license. Please contact licensing@stalw.art
+ * for more details.
+*/
+
+use directory::{DatabaseColumn, Lookup};
+use nlp::{
+    bayes::{cache::BayesTokenCache, tokenize::BayesTokenizer, BayesModel, TokenHash, Weights},
+    tokenizers::osb::{OsbToken, OsbTokenizer},
+};
+use sieve::{runtime::Variable, FunctionMap};
+use tokio::runtime::Handle;
+
+use crate::config::scripts::SieveContext;
+
+use super::PluginContext;
+
+pub fn register_train(plugin_id: u32, fnc_map: &mut FunctionMap<SieveContext>) {
+    fnc_map.set_external_function("bayes_train", plugin_id, 2);
+}
+
+pub fn register_untrain(plugin_id: u32, fnc_map: &mut FunctionMap<SieveContext>) {
+    fnc_map.set_external_function("bayes_untrain", plugin_id, 2);
+}
+
+pub fn register_classify(plugin_id: u32, fnc_map: &mut FunctionMap<SieveContext>) {
+    fnc_map.set_external_function("bayes_classify", plugin_id, 1);
+}
+
+pub fn exec_train(ctx: PluginContext<'_>) -> Variable<'static> {
+    train(ctx, true)
+}
+
+pub fn exec_untrain(ctx: PluginContext<'_>) -> Variable<'static> {
+    train(ctx, false)
+}
+
+fn train(ctx: PluginContext<'_>, is_train: bool) -> Variable<'static> {
+    let mut arguments = ctx.arguments.into_iter();
+    let text = arguments.next().unwrap().into_string();
+    if text.is_empty() {
+        return false.into();
+    }
+    let handle = ctx.handle;
+    let ctx = ctx.core.sieve.runtime.context();
+
+    // Train the model
+    let is_spam = arguments.next().unwrap().to_bool();
+    let mut model = BayesModel::default();
+    model.train(
+        OsbTokenizer::new(BayesTokenizer::new(text.as_ref(), &ctx.psl), 5),
+        is_spam,
+    );
+    if model.weights.is_empty() {
+        return false.into();
+    }
+
+    // Update weight and invalidate cache
+    let upsert = &ctx.lookup_train;
+    for (hash, weights) in model.weights {
+        let (s_weight, h_weight) = if is_train {
+            (weights.spam as i64, weights.ham as i64)
+        } else {
+            (-(weights.spam as i64), -(weights.ham as i64))
+        };
+        if handle
+            .block_on(upsert.lookup(&[
+                hash.h1.into(),
+                hash.h2.into(),
+                s_weight.into(),
+                h_weight.into(),
+            ]))
+            .is_none()
+        {
+            return false.into();
+        }
+        ctx.bayes_cache.invalidate(&hash);
+    }
+
+    // Update training counts
+    let train_val = if is_train { 1i64 } else { -1i64 };
+    let (spam_count, ham_count) = if is_spam {
+        (train_val, 0i64)
+    } else {
+        (0i64, train_val)
+    };
+    if handle
+        .block_on(upsert.query(&[
+            0i64.into(),
+            0i64.into(),
+            spam_count.into(),
+            ham_count.into(),
+        ]))
+        .is_none()
+    {
+        return false.into();
+    }
+    ctx.bayes_cache.invalidate(&TokenHash::default());
+
+    true.into()
+}
+
+pub fn exec_classify(ctx: PluginContext<'_>) -> Variable<'static> {
+    let mut arguments = ctx.arguments.into_iter();
+    let text = arguments.next().unwrap().into_string();
+    if text.is_empty() {
+        return 0.into();
+    }
+    let handle = ctx.handle;
+    let ctx = ctx.core.sieve.runtime.context();
+    let get_token = &ctx.lookup_classify;
+
+    // Obtain training counts
+    let (spam_learns, ham_learns) = if let Some(weights) =
+        ctx.bayes_cache
+            .get_or_update(TokenHash::default(), handle, get_token)
+    {
+        (weights.spam, weights.ham)
+    } else {
+        return 0.into();
+    };
+
+    // Make sure we have enough training data
+    if spam_learns < ctx.bayes_classify.min_learns || ham_learns < ctx.bayes_classify.min_learns {
+        return 0.into();
+    }
+
+    // Classify the text
+    ctx.bayes_classify
+        .classify(
+            OsbTokenizer::<_, TokenHash>::new(BayesTokenizer::new(text.as_ref(), &ctx.psl), 5)
+                .filter_map(|t| {
+                    OsbToken {
+                        inner: ctx.bayes_cache.get_or_update(t.inner, handle, get_token)?,
+                        idx: t.idx,
+                    }
+                    .into()
+                }),
+            ham_learns,
+            spam_learns,
+        )
+        .unwrap_or_default()
+        .into()
+}
+
+trait LookupOrInsert {
+    fn get_or_update(
+        &self,
+        hash: TokenHash,
+        handle: &Handle,
+        get_token: &Lookup,
+    ) -> Option<Weights>;
+}
+
+impl LookupOrInsert for BayesTokenCache {
+    fn get_or_update(
+        &self,
+        hash: TokenHash,
+        handle: &Handle,
+        get_token: &Lookup,
+    ) -> Option<Weights> {
+        if let Some(weights) = self.get(&hash) {
+            weights.unwrap_or_default().into()
+        } else if let Some(result) =
+            handle.block_on(get_token.query(&[hash.h1.into(), hash.h2.into()]))
+        {
+            let mut result = result.into_iter();
+            match (result.next(), result.next()) {
+                (Some(DatabaseColumn::Integer(spam)), Some(DatabaseColumn::Integer(ham))) => {
+                    let weights = Weights {
+                        spam: spam as u32,
+                        ham: ham as u32,
+                    };
+                    self.insert_positive(hash, weights);
+                    weights
+                }
+                _ => {
+                    self.insert_negative(hash);
+                    Weights::default()
+                }
+            }
+            .into()
+        } else {
+            // Something went wrong
+            None
+        }
+    }
+}
--- a/crates/smtp/src/scripts/plugins/lookup.rs
+++ b/crates/smtp/src/scripts/plugins/lookup.rs
@ -21,6 +21,7 @@
 * for more details.
 */

+use directory::DatabaseColumn;
 use sieve::{runtime::Variable, FunctionMap};

 use crate::config::scripts::SieveContext;
@ -62,15 +63,20 @@ pub fn exec(ctx: PluginContext<'_>) -> Variable<'static> {
 }

 pub fn exec_map(ctx: PluginContext<'_>) -> Variable<'static> {
-    let lookup_id = ctx.arguments[0].to_cow();
-    let item = ctx.arguments[1].to_cow();
+    let mut arguments = ctx.arguments.into_iter();
+    let lookup_id = arguments.next().unwrap().into_cow();
+    let items = match arguments.next().unwrap() {
+        Variable::Array(l) => l.into_iter().map(DatabaseColumn::from).collect(),
+        Variable::ArrayRef(l) => l.iter().map(DatabaseColumn::from).collect(),
+        v => vec![DatabaseColumn::from(v)],
+    };
    let span = ctx.span;

-    if !lookup_id.is_empty() && !item.is_empty() {
+    if !lookup_id.is_empty() && !items.is_empty() {
        if let Some(lookup) = ctx.core.sieve.lookup.get(lookup_id.as_ref()) {
            return ctx
                .handle
-                .block_on(lookup.lookup(item.as_ref()))
+                .block_on(lookup.lookup(&items))
                .unwrap_or_default();
        } else {
            tracing::warn!(
--- a/crates/smtp/src/scripts/plugins/mod.rs
+++ b/crates/smtp/src/scripts/plugins/mod.rs
@ -21,13 +21,13 @@
 * for more details.
 */

+pub mod bayes;
 pub mod dns;
 pub mod exec;
 pub mod http;
 pub mod lookup;
 pub mod query;

-use ahash::AHashMap;
 use mail_parser::Message;
 use sieve::{runtime::Variable, FunctionMap, Input};
 use tokio::runtime::Handle;
@ -41,12 +41,11 @@ pub struct PluginContext<'x> {
    pub span: &'x tracing::Span,
    pub handle: &'x Handle,
    pub core: &'x SMTP,
-    pub data: &'x mut AHashMap<String, String>,
    pub message: &'x Message<'x>,
    pub arguments: Vec<Variable<'static>>,
 }

-const PLUGINS_EXEC: [ExecPluginFnc; 7] = [
+const PLUGINS_EXEC: [ExecPluginFnc; 10] = [
    query::exec,
    exec::exec,
    lookup::exec,
@ -54,8 +53,11 @@ const PLUGINS_EXEC: [ExecPluginFnc; 7] = [
    dns::exec,
    dns::exec_exists,
    http::exec_header,
+    bayes::exec_train,
+    bayes::exec_untrain,
+    bayes::exec_classify,
 ];
-const PLUGINS_REGISTER: [RegisterPluginFnc; 7] = [
+const PLUGINS_REGISTER: [RegisterPluginFnc; 10] = [
    query::register,
    exec::register,
    lookup::register,
@ -63,6 +65,9 @@ const PLUGINS_REGISTER: [RegisterPluginFnc; 7] = [
    dns::register,
    dns::register_exists,
    http::register_header,
+    bayes::register_train,
+    bayes::register_untrain,
+    bayes::register_classify,
 ];

 pub trait RegisterSievePlugins {
--- a/crates/smtp/src/scripts/plugins/query.rs
+++ b/crates/smtp/src/scripts/plugins/query.rs
@ -22,7 +22,7 @@
 */

 use crate::config::scripts::SieveContext;
-use directory::QueryColumn;
+use directory::DatabaseColumn;
 use sieve::{runtime::Variable, FunctionMap};

 use super::PluginContext;
@ -62,8 +62,12 @@ pub fn exec(ctx: PluginContext<'_>) -> Variable<'static> {
        return false.into();
    }

-    // Obtain parameters
-    let parameters = arguments.next().unwrap().into_string_array();
+    // Obtain arguments
+    let arguments = match arguments.next().unwrap() {
+        Variable::Array(l) => l.into_iter().map(DatabaseColumn::from).collect(),
+        Variable::ArrayRef(l) => l.iter().map(DatabaseColumn::from).collect(),
+        v => vec![DatabaseColumn::from(v)],
+    };

    // Run query
    if query
@ -71,12 +75,9 @@ pub fn exec(ctx: PluginContext<'_>) -> Variable<'static> {
        .get(..6)
        .map_or(false, |q| q.eq_ignore_ascii_case(b"SELECT"))
    {
-        if let Ok(mut query_columns) = ctx.handle.block_on(directory.query(
-            &query,
-            &parameters.iter().map(String::as_str).collect::<Vec<_>>(),
-        )) {
+        if let Ok(mut query_columns) = ctx.handle.block_on(directory.query(&query, &arguments)) {
            match query_columns.len() {
-                1 if !matches!(query_columns.first(), Some(QueryColumn::Null)) => {
+                1 if !matches!(query_columns.first(), Some(DatabaseColumn::Null)) => {
                    query_columns.pop().map(Variable::from).unwrap()
                }
                0 => Variable::default(),
@ -87,10 +88,7 @@ pub fn exec(ctx: PluginContext<'_>) -> Variable<'static> {
        }
    } else {
        ctx.handle
-            .block_on(directory.lookup(
-                &query,
-                &parameters.iter().map(String::as_str).collect::<Vec<_>>(),
-            ))
+            .block_on(directory.lookup(&query, &arguments))
            .is_ok()
            .into()
    }
--- a/crates/utils/src/config/utils.rs
+++ b/crates/utils/src/config/utils.rs
@ -298,6 +298,18 @@ impl ParseValue for u64 {
    }
 }

+impl ParseValue for f64 {
+    fn parse_value(key: impl AsKey, value: &str) -> super::Result<Self> {
+        value.parse().map_err(|_| {
+            format!(
+                "Invalid floating point value {:?} for property {:?}.",
+                value,
+                key.as_key()
+            )
+        })
+    }
+}
+
 impl ParseValue for u16 {
    fn parse_value(key: impl AsKey, value: &str) -> super::Result<Self> {
        value.parse().map_err(|_| {
--- a/crates/utils/src/lib.rs
+++ b/crates/utils/src/lib.rs
@ -30,6 +30,7 @@ pub mod config;
 pub mod ipc;
 pub mod listener;
 pub mod map;
+pub mod suffixlist;

 use opentelemetry::{
    sdk::{
--- a/crates/utils/src/suffixlist.rs
+++ b/crates/utils/src/suffixlist.rs
@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2023 Stalwart Labs Ltd.
+ *
+ * This file is part of Stalwart Mail Server.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ * in the LICENSE file at the top-level directory of this distribution.
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * You can be released from the requirements of the AGPLv3 license by
+ * purchasing a commercial license. Please contact licensing@stalw.art
+ * for more details.
+*/
+
+use ahash::AHashSet;
+
+#[derive(Debug, Clone, Default)]
+pub struct PublicSuffix {
+    pub suffixes: AHashSet<String>,
+    pub exceptions: AHashSet<String>,
+    pub wildcards: Vec<String>,
+}
+
+impl PublicSuffix {
+    pub fn contains(&self, suffix: &str) -> bool {
+        self.suffixes.contains(suffix)
+            || (!self.exceptions.contains(suffix)
+                && self.wildcards.iter().any(|w| suffix.ends_with(w)))
+    }
+}
+
+impl From<&str> for PublicSuffix {
+    fn from(list: &str) -> Self {
+        let mut ps = PublicSuffix::default();
+        for line in list.lines() {
+            let line = line.trim().to_lowercase();
+            if !line.starts_with("//") {
+                if let Some(domain) = line.strip_prefix('*') {
+                    ps.wildcards.push(domain.to_string());
+                } else if let Some(domain) = line.strip_prefix('!') {
+                    ps.exceptions.insert(domain.to_string());
+                } else {
+                    ps.suffixes.insert(line.to_string());
+                }
+            }
+        }
+        ps.suffixes.insert("onion".to_string());
+        ps
+    }
+}
--- a/resources/config/sieve/headers.sieve
+++ b/resources/config/sieve/headers.sieve
@ -1,6 +1,5 @@
 # Mailing list scores
 let "ml_score" "count(header.List-Id:List-Archive:List-Owner:List-Help:List-Post:X-Loop:List-Subscribe:List-Unsubscribe[*].exists) * 0.125";
-eval "print('ml_score: ' + ml_score)";
 if eval "ml_score < 1" {
    if eval "header.List-Id.exists" {
        let "ml_score" "ml_score + 0.50";