Spam train/classify REST API endpoints

This commit is contained in:
mdecimus 2024-12-29 16:39:16 +01:00
parent a5bec187aa
commit 4a7524cafe
12 changed files with 498 additions and 232 deletions

View file

@ -116,7 +116,7 @@ impl Core {
} else {
config.new_parse_error(
"storage.lookup",
format!("Lookup store {id:?} not found"),
format!("In-memory store {id:?} not found"),
);
None
}

View file

@ -325,7 +325,7 @@ impl BootManager {
}
}
// Parse lookup stores
// Parse in-memory stores
stores.parse_in_memory(&mut config).await;
// Parse settings

View file

@ -73,7 +73,7 @@ impl Permission {
Permission::BlobFetch => "Retrieve arbitrary blobs",
Permission::PurgeBlobStore => "Purge the blob storage",
Permission::PurgeDataStore => "Purge the data storage",
Permission::PurgeInMemoryStore => "Purge the lookup storage",
Permission::PurgeInMemoryStore => "Purge the in-memory storage",
Permission::PurgeAccount => "Purge user accounts",
Permission::FtsReindex => "Rebuild the full-text search index",
Permission::Undelete => "Restore deleted items",

View file

@ -14,7 +14,7 @@ pub mod queue;
pub mod reload;
pub mod report;
pub mod settings;
pub mod sieve;
pub mod spam;
pub mod stores;
pub mod troubleshoot;
@ -35,7 +35,7 @@ use reload::ManageReload;
use report::ManageReports;
use serde::Serialize;
use settings::ManageSettings;
use sieve::SieveHandler;
use spam::ManageSpamHandler;
use store::write::now;
use stores::ManageStore;
use troubleshoot::TroubleshootApi;
@ -117,7 +117,10 @@ impl ManagementApi for Server {
"logs" if req.method() == Method::GET => {
self.handle_view_logs(req, &access_token).await
}
"sieve" => self.handle_run_sieve(req, path, body, &access_token).await,
"spam-filter" => {
self.handle_manage_spam(req, path, body, session, &access_token)
.await
}
"restart" if req.method() == Method::GET => {
// Validate the access token
access_token.assert_has_permission(Permission::Restart)?;

View file

@ -1,134 +0,0 @@
/*
* SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
*
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::time::SystemTime;
use common::{auth::AccessToken, scripts::ScriptModification, IntoString, Server};
use directory::Permission;
use hyper::Method;
use serde_json::json;
use sieve::{runtime::Variable, Envelope};
use smtp::scripts::{event_loop::RunScript, ScriptParameters, ScriptResult};
use std::future::Future;
use utils::url_params::UrlParams;
use crate::api::{http::ToHttpResponse, HttpRequest, HttpResponse, JsonResponse};
#[derive(Debug, serde::Serialize)]
#[serde(tag = "action")]
#[serde(rename_all = "lowercase")]
pub enum Response {
Accept {
modifications: Vec<ScriptModification>,
},
Replace {
message: String,
modifications: Vec<ScriptModification>,
},
Reject {
reason: String,
},
Discard,
}
pub trait SieveHandler: Sync + Send {
fn handle_run_sieve(
&self,
req: &HttpRequest,
path: Vec<&str>,
body: Option<Vec<u8>>,
access_token: &AccessToken,
) -> impl Future<Output = trc::Result<HttpResponse>> + Send;
}
impl SieveHandler for Server {
async fn handle_run_sieve(
&self,
req: &HttpRequest,
path: Vec<&str>,
_body: Option<Vec<u8>>,
access_token: &AccessToken,
) -> trc::Result<HttpResponse> {
// Validate the access token
access_token.assert_has_permission(Permission::SpamFilterTrain)?;
let (script, script_id) = match (
path.get(1).and_then(|name| {
self.core
.sieve
.trusted_scripts
.get(*name)
.map(|s| (s.clone(), name.to_string()))
}),
req.method(),
) {
(Some(script), &Method::POST) => script,
_ => {
return Err(trc::ResourceEvent::NotFound.into_err());
}
};
let mut params = ScriptParameters::new()
.set_variable(
"now",
SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.map_or(0, |d| d.as_secs()),
)
.set_variable("test", true);
let mut envelope_to = Vec::new();
for (key, value) in UrlParams::new(req.uri().query()).into_inner() {
if key.starts_with("env_to") {
envelope_to.push(Variable::from(value.to_lowercase()));
continue;
}
let env = match key.as_ref() {
"env_from" => Envelope::From,
"env_orcpt" => Envelope::Orcpt,
"env_ret" => Envelope::Ret,
"env_notify" => Envelope::Notify,
"env_id" => Envelope::Envid,
"env_bym" => Envelope::ByMode,
"env_byt" => Envelope::ByTrace,
"env_byta" => Envelope::ByTimeAbsolute,
"env_bytr" => Envelope::ByTimeRelative,
_ => {
params = params.set_variable(key.into_owned(), value.into_owned());
continue;
}
};
params = params.set_envelope(env, value);
}
if !envelope_to.is_empty() {
params = params.set_envelope(Envelope::To, Variable::from(envelope_to));
}
// Run script
let result = match self
.run_script(script_id, script, params.with_access_token(access_token))
.await
{
ScriptResult::Accept { modifications } => Response::Accept { modifications },
ScriptResult::Replace {
message,
modifications,
} => Response::Replace {
message: message.into_string(),
modifications,
},
ScriptResult::Reject(reason) => Response::Reject { reason },
ScriptResult::Discard => Response::Discard,
};
Ok(JsonResponse::new(json!({
"data": result,
}))
.into_http_response())
}
}

View file

@ -0,0 +1,254 @@
/*
* SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
*
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::{net::IpAddr, sync::Arc};
use common::{
auth::AccessToken, config::spamfilter::SpamFilterAction, scripts::ScriptModification, Server,
};
use directory::{
backend::internal::manage::{self, ManageDirectory},
Permission,
};
use hyper::Method;
use mail_auth::{ArcOutput, DkimOutput, IprevOutput};
use mail_parser::{Message, MessageParser};
use serde::{Deserialize, Serialize};
use serde_json::json;
use spam_filter::{
analysis::{init::SpamFilterInit, score::SpamFilterAnalyzeScore},
modules::bayes::BayesClassifier,
SpamFilterInput,
};
use std::future::Future;
use store::ahash::AHashMap;
use crate::api::{
http::{HttpSessionData, ToHttpResponse},
HttpRequest, HttpResponse, JsonResponse,
};
use super::{
decode_path_element,
troubleshoot::{AuthResult, DmarcPolicy},
};
#[derive(Debug, serde::Serialize)]
#[serde(tag = "action")]
#[serde(rename_all = "lowercase")]
pub enum Response {
Accept {
modifications: Vec<ScriptModification>,
},
Replace {
message: String,
modifications: Vec<ScriptModification>,
},
Reject {
reason: String,
},
Discard,
}
pub trait ManageSpamHandler: Sync + Send {
fn handle_manage_spam(
&self,
req: &HttpRequest,
path: Vec<&str>,
body: Option<Vec<u8>>,
session: &HttpSessionData,
access_token: &AccessToken,
) -> impl Future<Output = trc::Result<HttpResponse>> + Send;
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SpamClassifyRequest {
pub message: String,
// Sender authentication
pub arc_result: AuthResult,
pub spf_ehlo_result: AuthResult,
pub spf_mail_from_result: AuthResult,
pub dkim_result: AuthResult,
pub dmarc_result: AuthResult,
pub dmarc_policy: DmarcPolicy,
pub iprev_result: AuthResult,
// Session details
pub remote_ip: IpAddr,
#[serde(default)]
pub remote_ip_ptr: Option<String>,
#[serde(default)]
pub ehlo_domain: Option<String>,
#[serde(default)]
pub authenticated_as: Option<String>,
#[serde(default)]
pub asn: Option<u32>,
#[serde(default)]
pub country: Option<String>,
// TLS
#[serde(default)]
pub is_tls: bool,
// Envelope
pub env_from: String,
pub env_from_flags: u64,
pub env_rcpt_to: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SpamClassifyResponse {
pub score: f64,
pub tags: AHashMap<String, SpamFilterDisposition<f64>>,
pub disposition: SpamFilterDisposition<String>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
#[serde(tag = "action")]
pub enum SpamFilterDisposition<T> {
Allow { value: T },
Discard,
Reject,
}
impl ManageSpamHandler for Server {
async fn handle_manage_spam(
&self,
req: &HttpRequest,
path: Vec<&str>,
body: Option<Vec<u8>>,
session: &HttpSessionData,
access_token: &AccessToken,
) -> trc::Result<HttpResponse> {
// Validate the access token
access_token.assert_has_permission(Permission::SpamFilterTrain)?;
match (path.get(1).copied(), path.get(2).copied(), req.method()) {
(Some("train"), Some(class @ ("ham" | "spam")), &Method::POST) => {
let message = parse_message_or_err(body.as_deref().unwrap_or_default())?;
let input = if let Some(account) = path.get(3).copied() {
let account_id = self
.store()
.get_principal_id(decode_path_element(account).as_ref())
.await?
.ok_or_else(|| trc::ManageEvent::NotFound.into_err())?;
SpamFilterInput::from_account_message(&message, account_id, session.session_id)
} else {
SpamFilterInput::from_message(&message, session.session_id)
};
self.bayes_train(&self.spam_filter_init(input), class == "spam", true)
.await?;
Ok(JsonResponse::new(json!({
"data": (),
}))
.into_http_response())
}
(Some("classify"), _, &Method::POST) => {
// Parse request
let request = serde_json::from_slice::<SpamClassifyRequest>(
body.as_deref().unwrap_or_default(),
)
.map_err(|err| {
trc::EventType::Resource(trc::ResourceEvent::BadParameters).from_json_error(err)
})?;
// Built classifier input
let message = parse_message_or_err(request.message.as_bytes())?;
let arc_result = ArcOutput::default().with_result(request.arc_result.into());
let spf_ehlo_result = request.spf_ehlo_result.into();
let spf_mail_from_result = request.spf_mail_from_result.into();
let dkim_result = vec![match request.dkim_result {
AuthResult::Pass => DkimOutput::pass(),
AuthResult::Fail { details } => {
DkimOutput::fail(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::Neutral { details } => {
DkimOutput::neutral(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::TempError { details } => {
DkimOutput::temp_err(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::PermError { details } => {
DkimOutput::perm_err(mail_auth::Error::Io(details.unwrap_or_default()))
}
_ => DkimOutput::neutral(mail_auth::Error::ParseError),
}];
let dmarc_result = request.dmarc_result.into();
let dmarc_policy = request.dmarc_policy.into();
let iprev_result = IprevOutput {
result: request.iprev_result.into(),
ptr: request.remote_ip_ptr.map(|ptr| Arc::new(vec![ptr])),
};
let input = SpamFilterInput {
message: &message,
span_id: session.session_id,
arc_result: Some(&arc_result),
spf_ehlo_result: Some(&spf_ehlo_result),
spf_mail_from_result: Some(&spf_mail_from_result),
dkim_result: dkim_result.as_slice(),
dmarc_result: Some(&dmarc_result),
dmarc_policy: Some(&dmarc_policy),
iprev_result: Some(&iprev_result),
remote_ip: request.remote_ip,
ehlo_domain: request.ehlo_domain.as_deref(),
authenticated_as: request.authenticated_as.as_deref(),
asn: request.asn,
country: request.country.as_deref(),
is_tls: request.is_tls,
env_from: &request.env_from,
env_from_flags: request.env_from_flags,
env_rcpt_to: request.env_rcpt_to.iter().map(String::as_str).collect(),
account_id: None,
is_test: true,
};
// Classify
let mut ctx = self.spam_filter_init(input);
let result = self.spam_filter_classify(&mut ctx).await;
// Build response
let mut response = SpamClassifyResponse {
score: ctx.result.score,
tags: AHashMap::with_capacity(ctx.result.tags.len()),
disposition: match result {
SpamFilterAction::Allow(value) => SpamFilterDisposition::Allow { value },
SpamFilterAction::Discard => SpamFilterDisposition::Discard,
SpamFilterAction::Reject => SpamFilterDisposition::Reject,
},
};
for tag in ctx.result.tags {
let disposition = match self.core.spam.lists.scores.get(&tag) {
Some(SpamFilterAction::Allow(score)) => {
SpamFilterDisposition::Allow { value: *score }
}
Some(SpamFilterAction::Discard) => SpamFilterDisposition::Discard,
Some(SpamFilterAction::Reject) => SpamFilterDisposition::Reject,
None => SpamFilterDisposition::Allow { value: 0.0 },
};
response.tags.insert(tag, disposition);
}
Ok(JsonResponse::new(json!({
"data": response,
}))
.into_http_response())
}
_ => Err(trc::ResourceEvent::NotFound.into_err()),
}
}
}
fn parse_message_or_err(bytes: &[u8]) -> trc::Result<Message<'_>> {
MessageParser::new()
.parse(bytes)
.filter(|m| m.root_part().headers().iter().any(|h| !h.name.is_other()))
.ok_or_else(|| manage::error("Failed to parse message.", None::<u64>))
}

View file

@ -1058,6 +1058,28 @@ impl From<&SpfOutput> for AuthResult {
}
}
impl From<AuthResult> for SpfOutput {
fn from(value: AuthResult) -> Self {
match value {
AuthResult::Pass => SpfOutput::new(String::new()).with_result(SpfResult::Pass),
AuthResult::Fail { .. } => SpfOutput::new(String::new()).with_result(SpfResult::Fail),
AuthResult::SoftFail { .. } => {
SpfOutput::new(String::new()).with_result(SpfResult::SoftFail)
}
AuthResult::Neutral { .. } => {
SpfOutput::new(String::new()).with_result(SpfResult::Neutral)
}
AuthResult::TempError { .. } => {
SpfOutput::new(String::new()).with_result(SpfResult::TempError)
}
AuthResult::PermError { .. } => {
SpfOutput::new(String::new()).with_result(SpfResult::PermError)
}
AuthResult::None => SpfOutput::new(String::new()).with_result(SpfResult::None),
}
}
}
impl From<&IprevOutput> for AuthResult {
fn from(value: &IprevOutput) -> Self {
match &value.result {
@ -1076,6 +1098,25 @@ impl From<&IprevOutput> for AuthResult {
}
}
impl From<AuthResult> for IprevResult {
fn from(value: AuthResult) -> Self {
match value {
AuthResult::Pass => IprevResult::Pass,
AuthResult::Fail { details } => {
IprevResult::Fail(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::TempError { details } => {
IprevResult::TempError(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::PermError { details } => {
IprevResult::PermError(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::None => IprevResult::None,
_ => IprevResult::None,
}
}
}
impl From<&DkimResult> for AuthResult {
fn from(value: &DkimResult) -> Self {
match value {
@ -1097,6 +1138,27 @@ impl From<&DkimResult> for AuthResult {
}
}
impl From<AuthResult> for DkimResult {
fn from(value: AuthResult) -> Self {
match value {
AuthResult::Pass => DkimResult::Pass,
AuthResult::Neutral { details } => {
DkimResult::Neutral(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::Fail { details } => {
DkimResult::Fail(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::PermError { details } => {
DkimResult::PermError(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::TempError { details } => {
DkimResult::TempError(mail_auth::Error::Io(details.unwrap_or_default()))
}
_ => DkimResult::None,
}
}
}
impl From<&DmarcResult> for AuthResult {
fn from(value: &DmarcResult) -> Self {
match value {
@ -1115,6 +1177,25 @@ impl From<&DmarcResult> for AuthResult {
}
}
impl From<AuthResult> for DmarcResult {
fn from(value: AuthResult) -> Self {
match value {
AuthResult::Pass => DmarcResult::Pass,
AuthResult::Fail { details } => {
DmarcResult::Fail(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::TempError { details } => {
DmarcResult::TempError(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::PermError { details } => {
DmarcResult::PermError(mail_auth::Error::Io(details.unwrap_or_default()))
}
AuthResult::None => DmarcResult::None,
_ => DmarcResult::None,
}
}
}
impl From<&dmarc::Policy> for DmarcPolicy {
fn from(value: &dmarc::Policy) -> Self {
match value {
@ -1125,3 +1206,14 @@ impl From<&dmarc::Policy> for DmarcPolicy {
}
}
}
impl From<DmarcPolicy> for dmarc::Policy {
fn from(value: DmarcPolicy) -> Self {
match value {
DmarcPolicy::None => dmarc::Policy::None,
DmarcPolicy::Quarantine => dmarc::Policy::Quarantine,
DmarcPolicy::Reject => dmarc::Policy::Reject,
DmarcPolicy::Unspecified => dmarc::Policy::Unspecified,
}
}
}

View file

@ -728,7 +728,7 @@ impl Purge for Server {
.ctx(trc::Key::Key, prefix));
}
} else if let Err(err) = store.purge_in_memory_store().await {
trc::error!(err.details("Failed to purge lookup store"));
trc::error!(err.details("Failed to purge in-memory store"));
}
}
PurgeType::Account(account_id) => {

View file

@ -9,22 +9,12 @@ use mail_auth::{dmarc::Policy, ArcOutput, DkimOutput, DmarcResult};
use mail_parser::Message;
use spam_filter::{
analysis::{
bayes::SpamFilterAnalyzeBayes, date::SpamFilterAnalyzeDate, dmarc::SpamFilterAnalyzeDmarc,
domain::SpamFilterAnalyzeDomain, ehlo::SpamFilterAnalyzeEhlo, from::SpamFilterAnalyzeFrom,
headers::SpamFilterAnalyzeHeaders, html::SpamFilterAnalyzeHtml, init::SpamFilterInit,
ip::SpamFilterAnalyzeIp, messageid::SpamFilterAnalyzeMid, mime::SpamFilterAnalyzeMime,
pyzor::SpamFilterAnalyzePyzor, received::SpamFilterAnalyzeReceived,
recipient::SpamFilterAnalyzeRecipient, replyto::SpamFilterAnalyzeReplyTo,
reputation::SpamFilterAnalyzeReputation, rules::SpamFilterAnalyzeRules,
score::SpamFilterAnalyzeScore, subject::SpamFilterAnalyzeSubject,
trusted_reply::SpamFilterAnalyzeTrustedReply, url::SpamFilterAnalyzeUrl,
init::SpamFilterInit, score::SpamFilterAnalyzeScore,
trusted_reply::SpamFilterAnalyzeTrustedReply,
},
SpamFilterInput,
};
#[cfg(feature = "enterprise")]
use spam_filter::analysis::llm::SpamFilterAnalyzeLlm;
use crate::core::Session;
impl<T: SessionStream> Session<T> {
@ -46,82 +36,8 @@ impl<T: SessionStream> Session<T> {
));
if !self.is_authenticated() {
// IP address analysis
server.spam_filter_analyze_ip(&mut ctx).await;
// DMARC/SPF/DKIM/ARC analysis
server.spam_filter_analyze_dmarc(&mut ctx).await;
// EHLO hostname analysis
server.spam_filter_analyze_ehlo(&mut ctx).await;
// Generic header analysis
server.spam_filter_analyze_headers(&mut ctx).await;
// Received headers analysis
server.spam_filter_analyze_received(&mut ctx).await;
// Message-ID analysis
server.spam_filter_analyze_message_id(&mut ctx).await;
// Date header analysis
server.spam_filter_analyze_date(&mut ctx).await;
// Subject analysis
server.spam_filter_analyze_subject(&mut ctx).await;
// From and Envelope From analysis
server.spam_filter_analyze_from(&mut ctx).await;
// Reply-To analysis
server.spam_filter_analyze_reply_to(&mut ctx).await;
// Recipient analysis
server.spam_filter_analyze_recipient(&mut ctx).await;
// E-mail and domain analysis
server.spam_filter_analyze_domain(&mut ctx).await;
// URL analysis
server.spam_filter_analyze_url(&mut ctx).await;
// MIME part analysis
server.spam_filter_analyze_mime(&mut ctx).await;
// HTML content analysis
server.spam_filter_analyze_html(&mut ctx).await;
// LLM classification
#[cfg(feature = "enterprise")]
server.spam_filter_analyze_llm(&mut ctx).await;
// Trusted reply analysis
server.spam_filter_analyze_reply_in(&mut ctx).await;
// Spam trap
server.spam_filter_analyze_spam_trap(&mut ctx).await;
// Pyzor checks
server.spam_filter_analyze_pyzor(&mut ctx).await;
// Bayes classification
server.spam_filter_analyze_bayes_classify(&mut ctx).await;
// User-defined rules
server.spam_filter_analyze_rules(&mut ctx).await;
// Calculate score
match server.spam_filter_score(&mut ctx).await {
SpamFilterAction::Allow(_) => (),
SpamFilterAction::Discard => return SpamFilterAction::Discard,
SpamFilterAction::Reject => return SpamFilterAction::Reject,
}
// Reputation tracking and adjust score
server.spam_filter_analyze_reputation(&mut ctx).await;
// Final score calculation
server.spam_filter_finalize(&mut ctx).await
// Spam classification
server.spam_filter_classify(&mut ctx).await
} else {
// Trusted reply tracking
server.spam_filter_analyze_reply_out(&mut ctx).await;

View file

@ -7,7 +7,24 @@
use common::{config::spamfilter::SpamFilterAction, Server};
use std::{fmt::Write, future::Future, vec};
use crate::{modules::bayes::BayesClassifier, SpamFilterContext};
use crate::{
analysis::{
bayes::SpamFilterAnalyzeBayes, date::SpamFilterAnalyzeDate, dmarc::SpamFilterAnalyzeDmarc,
domain::SpamFilterAnalyzeDomain, ehlo::SpamFilterAnalyzeEhlo, from::SpamFilterAnalyzeFrom,
headers::SpamFilterAnalyzeHeaders, html::SpamFilterAnalyzeHtml, ip::SpamFilterAnalyzeIp,
messageid::SpamFilterAnalyzeMid, mime::SpamFilterAnalyzeMime,
pyzor::SpamFilterAnalyzePyzor, received::SpamFilterAnalyzeReceived,
recipient::SpamFilterAnalyzeRecipient, replyto::SpamFilterAnalyzeReplyTo,
reputation::SpamFilterAnalyzeReputation, rules::SpamFilterAnalyzeRules,
subject::SpamFilterAnalyzeSubject, trusted_reply::SpamFilterAnalyzeTrustedReply,
url::SpamFilterAnalyzeUrl,
},
modules::bayes::BayesClassifier,
SpamFilterContext,
};
#[cfg(feature = "enterprise")]
use crate::analysis::llm::SpamFilterAnalyzeLlm;
pub trait SpamFilterAnalyzeScore: Sync + Send {
fn spam_filter_score(
@ -19,6 +36,11 @@ pub trait SpamFilterAnalyzeScore: Sync + Send {
&self,
ctx: &mut SpamFilterContext<'_>,
) -> impl Future<Output = SpamFilterAction<String>> + Send;
fn spam_filter_classify(
&self,
ctx: &mut SpamFilterContext<'_>,
) -> impl Future<Output = SpamFilterAction<String>> + Send;
}
impl SpamFilterAnalyzeScore for Server {
@ -72,7 +94,13 @@ impl SpamFilterAnalyzeScore for Server {
ctx: &mut SpamFilterContext<'_>,
) -> SpamFilterAction<String> {
// Train Bayes classifier
if let Some(config) = self.core.spam.bayes.as_ref().filter(|c| c.auto_learn) {
if let Some(config) = self
.core
.spam
.bayes
.as_ref()
.filter(|c| c.auto_learn && !ctx.input.is_test)
{
let was_classified =
ctx.result.has_tag("BAYES_SPAM") || ctx.result.has_tag("BAYES_HAM");
if ctx.result.has_tag("SPAM_TRAP")
@ -112,4 +140,86 @@ impl SpamFilterAnalyzeScore for Server {
SpamFilterAction::Allow(header)
}
}
async fn spam_filter_classify(
&self,
ctx: &mut SpamFilterContext<'_>,
) -> SpamFilterAction<String> {
// IP address analysis
self.spam_filter_analyze_ip(ctx).await;
// DMARC/SPF/DKIM/ARC analysis
self.spam_filter_analyze_dmarc(ctx).await;
// EHLO hostname analysis
self.spam_filter_analyze_ehlo(ctx).await;
// Generic header analysis
self.spam_filter_analyze_headers(ctx).await;
// Received headers analysis
self.spam_filter_analyze_received(ctx).await;
// Message-ID analysis
self.spam_filter_analyze_message_id(ctx).await;
// Date header analysis
self.spam_filter_analyze_date(ctx).await;
// Subject analysis
self.spam_filter_analyze_subject(ctx).await;
// From and Envelope From analysis
self.spam_filter_analyze_from(ctx).await;
// Reply-To analysis
self.spam_filter_analyze_reply_to(ctx).await;
// Recipient analysis
self.spam_filter_analyze_recipient(ctx).await;
// E-mail and domain analysis
self.spam_filter_analyze_domain(ctx).await;
// URL analysis
self.spam_filter_analyze_url(ctx).await;
// MIME part analysis
self.spam_filter_analyze_mime(ctx).await;
// HTML content analysis
self.spam_filter_analyze_html(ctx).await;
// LLM classification
#[cfg(feature = "enterprise")]
self.spam_filter_analyze_llm(ctx).await;
// Trusted reply analysis
self.spam_filter_analyze_reply_in(ctx).await;
// Spam trap
self.spam_filter_analyze_spam_trap(ctx).await;
// Pyzor checks
self.spam_filter_analyze_pyzor(ctx).await;
// Bayes classification
self.spam_filter_analyze_bayes_classify(ctx).await;
// User-defined rules
self.spam_filter_analyze_rules(ctx).await;
// Calculate score
match self.spam_filter_score(ctx).await {
SpamFilterAction::Allow(_) => (),
SpamFilterAction::Discard => return SpamFilterAction::Discard,
SpamFilterAction::Reject => return SpamFilterAction::Reject,
}
// Reputation tracking and adjust score
self.spam_filter_analyze_reputation(ctx).await;
// Final score calculation
self.spam_filter_finalize(ctx).await
}
}

View file

@ -129,6 +129,31 @@ pub struct Recipient {
}
impl<'x> SpamFilterInput<'x> {
pub fn from_message(message: &'x Message<'x>, span_id: u64) -> Self {
Self {
message,
span_id,
arc_result: None,
spf_ehlo_result: None,
spf_mail_from_result: None,
dkim_result: &[],
dmarc_result: None,
dmarc_policy: None,
iprev_result: None,
remote_ip: IpAddr::V4(Ipv4Addr::LOCALHOST),
ehlo_domain: None,
authenticated_as: None,
asn: None,
country: None,
is_tls: true,
env_from: "",
env_from_flags: 0,
env_rcpt_to: vec![],
account_id: None,
is_test: false,
}
}
pub fn from_account_message(message: &'x Message<'x>, account_id: u32, span_id: u64) -> Self {
Self {
message,

View file

@ -29,7 +29,7 @@ pub async fn lookup_tests() {
for (store_id, store) in stores.in_memory_stores {
let is_mysql = store_id == "mysql";
println!("Testing lookup store {}...", store_id);
println!("Testing in-memory store {}...", store_id);
if let InMemoryStore::Store(store) = &store {
store.destroy().await;
} else {