diff --git a/crates/common/src/scripts/mod.rs b/crates/common/src/scripts/mod.rs
index 2aec6814..01d16f9b 100644
--- a/crates/common/src/scripts/mod.rs
+++ b/crates/common/src/scripts/mod.rs
@@ -8,7 +8,7 @@ use crate::IntoString;
pub mod functions;
pub mod plugins;
-#[derive(Debug)]
+#[derive(Debug, serde::Serialize)]
pub enum ScriptModification {
SetEnvelope {
name: Envelope,
diff --git a/crates/common/src/scripts/plugins/bayes.rs b/crates/common/src/scripts/plugins/bayes.rs
index d96285d4..3ad583a9 100644
--- a/crates/common/src/scripts/plugins/bayes.rs
+++ b/crates/common/src/scripts/plugins/bayes.rs
@@ -79,9 +79,17 @@ async fn train(ctx: PluginContext<'_>, is_train: bool) -> Variable {
let text = ctx.arguments[1].to_string();
let is_spam = ctx.arguments[2].to_bool();
if text.is_empty() {
+ tracing::debug!(
+ parent: span,
+ context = "sieve:bayes_train",
+ event = "failed",
+ reason = "Empty message",
+ );
return false.into();
}
+ let c = println!("training: {:?} {}", text, is_spam);
+
// Train the model
let mut model = BayesModel::default();
model.train(
@@ -92,6 +100,12 @@ async fn train(ctx: PluginContext<'_>, is_train: bool) -> Variable {
is_spam,
);
if model.weights.is_empty() {
+ tracing::debug!(
+ parent: span,
+ context = "sieve:bayes_train",
+ event = "failed",
+ reason = "No weights found",
+ );
return false.into();
}
diff --git a/crates/jmap/src/api/management/mod.rs b/crates/jmap/src/api/management/mod.rs
index 463b7507..684b5ea0 100644
--- a/crates/jmap/src/api/management/mod.rs
+++ b/crates/jmap/src/api/management/mod.rs
@@ -29,6 +29,7 @@ pub mod queue;
pub mod reload;
pub mod report;
pub mod settings;
+pub mod sieve;
pub mod stores;
use std::{borrow::Cow, sync::Arc};
@@ -89,6 +90,7 @@ impl JMAP {
"logs" if is_superuser && req.method() == Method::GET => {
self.handle_view_logs(req).await
}
+ "sieve" if is_superuser => self.handle_run_sieve(req, path, body).await,
"restart" if is_superuser && req.method() == Method::GET => {
ManagementApiError::Unsupported {
details: "Restart is not yet supported".into(),
diff --git a/crates/jmap/src/api/management/sieve.rs b/crates/jmap/src/api/management/sieve.rs
new file mode 100644
index 00000000..9e6f0c60
--- /dev/null
+++ b/crates/jmap/src/api/management/sieve.rs
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023 Stalwart Labs Ltd.
+ *
+ * This file is part of Stalwart Mail Server.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ * in the LICENSE file at the top-level directory of this distribution.
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ * You can be released from the requirements of the AGPLv3 license by
+ * purchasing a commercial license. Please contact licensing@stalw.art
+ * for more details.
+*/
+
+use std::time::SystemTime;
+
+use hyper::Method;
+use jmap_proto::error::request::RequestError;
+use serde_json::json;
+use sieve::{runtime::Variable, Envelope};
+use smtp::scripts::ScriptParameters;
+use utils::url_params::UrlParams;
+
+use crate::{
+ api::{http::ToHttpResponse, HttpRequest, HttpResponse, JsonResponse},
+ JMAP,
+};
+
+impl JMAP {
+ pub async fn handle_run_sieve(
+ &self,
+ req: &HttpRequest,
+ path: Vec<&str>,
+ body: Option>,
+ ) -> HttpResponse {
+ let script = match (
+ path.get(1)
+ .and_then(|name| self.core.sieve.scripts.get(*name))
+ .cloned(),
+ req.method(),
+ ) {
+ (Some(script), &Method::POST) => script,
+ _ => {
+ return RequestError::not_found().into_http_response();
+ }
+ };
+
+ let mut params = ScriptParameters::new()
+ .set_variable(
+ "now",
+ SystemTime::now()
+ .duration_since(SystemTime::UNIX_EPOCH)
+ .map_or(0, |d| d.as_secs()),
+ )
+ .set_variable("test", true)
+ .with_message(body.as_deref().unwrap_or_default());
+
+ let mut envelope_to = Vec::new();
+ for (key, value) in UrlParams::new(req.uri().query()).into_inner() {
+ let env = match key.as_ref() {
+ "env_to" => {
+ envelope_to.push(Variable::from(value.to_lowercase()));
+ continue;
+ }
+ "env_from" => Envelope::From,
+ "env_orcpt" => Envelope::Orcpt,
+ "env_ret" => Envelope::Ret,
+ "env_notify" => Envelope::Notify,
+ "env_id" => Envelope::Envid,
+ "env_bym" => Envelope::ByMode,
+ "env_byt" => Envelope::ByTrace,
+ "env_byta" => Envelope::ByTimeAbsolute,
+ "env_bytr" => Envelope::ByTimeRelative,
+ _ => {
+ params = params.set_variable(key.into_owned(), value.into_owned());
+ continue;
+ }
+ };
+
+ params = params.set_envelope(env, value);
+ }
+
+ if !envelope_to.is_empty() {
+ params = params.set_envelope(Envelope::To, Variable::from(envelope_to));
+ }
+
+ // Run script
+ let result = self
+ .smtp
+ .run_script(script, params, tracing::debug_span!("sieve_manual_run"))
+ .await;
+
+ JsonResponse::new(json!({
+ "data": result,
+ }))
+ .into_http_response()
+ }
+}
diff --git a/crates/smtp/src/scripts/event_loop.rs b/crates/smtp/src/scripts/event_loop.rs
index 218b612b..2f680727 100644
--- a/crates/smtp/src/scripts/event_loop.rs
+++ b/crates/smtp/src/scripts/event_loop.rs
@@ -50,7 +50,7 @@ impl SMTP {
.core
.sieve
.trusted_runtime
- .filter(params.message.as_ref().map_or(b"", |m| &m[..]))
+ .filter(params.message.unwrap_or_default())
.with_vars_env(params.variables)
.with_envelope_list(params.envelope)
.with_user_address(¶ms.from_addr)
diff --git a/crates/smtp/src/scripts/mod.rs b/crates/smtp/src/scripts/mod.rs
index 63d9598a..b744a34a 100644
--- a/crates/smtp/src/scripts/mod.rs
+++ b/crates/smtp/src/scripts/mod.rs
@@ -31,7 +31,7 @@ pub mod envelope;
pub mod event_loop;
pub mod exec;
-#[derive(Debug)]
+#[derive(Debug, serde::Serialize)]
pub enum ScriptResult {
Accept {
modifications: Vec,
@@ -112,6 +112,11 @@ impl<'x> ScriptParameters<'x> {
self
}
+ pub fn set_envelope(mut self, envelope: Envelope, value: impl Into) -> Self {
+ self.envelope.push((envelope, value.into()));
+ self
+ }
+
#[cfg(feature = "test_mode")]
pub fn with_expected_variables(
mut self,
diff --git a/crates/utils/src/url_params.rs b/crates/utils/src/url_params.rs
index e9f78101..6588914d 100644
--- a/crates/utils/src/url_params.rs
+++ b/crates/utils/src/url_params.rs
@@ -55,4 +55,8 @@ impl<'x> UrlParams<'x> {
{
self.get(key).and_then(|v| v.parse().ok())
}
+
+ pub fn into_inner(self) -> HashMap, Cow<'x, str>> {
+ self.params
+ }
}
diff --git a/resources/config/build.py b/resources/config/build.py
index 5af133b2..7bb9ed0b 100644
--- a/resources/config/build.py
+++ b/resources/config/build.py
@@ -37,12 +37,17 @@ scripts = {
"greylist": [
"config.sieve",
"greylist.sieve"
+ ],
+ "train": [
+ "config.sieve",
+ "train.sieve"
]
}
script_names = {
"spam-filter" : "Spam Filter",
"track-replies" : "Track Replies",
- "greylist" : "Greylisting"
+ "greylist" : "Greylisting",
+ "train": "Train Bayes Classifier"
}
maps = ["spam_config.map",
@@ -69,7 +74,7 @@ def read_file(file):
return f.read() + "\n"
def build_spam_filters(scripts):
- spam_filter = "[version]\nspam-filter = \"1.0\"\n\n"
+ spam_filter = "[version]\nspam-filter = \"1.1\"\n\n"
for script_name, file_list in scripts.items():
script_content = read_and_concatenate(file_list).replace("'''", "\\'\\'\\'")
script_description = script_names[script_name]
diff --git a/resources/config/spamfilter.toml b/resources/config/spamfilter.toml
index 97706a75..e07edc7a 100644
--- a/resources/config/spamfilter.toml
+++ b/resources/config/spamfilter.toml
@@ -1,5 +1,5 @@
[version]
-spam-filter = "1.0"
+spam-filter = "1.1"
[sieve.trusted.scripts.spam-filter]
name = "Spam Filter"
@@ -17,7 +17,7 @@ let "ADD_HEADER_SPAM_RESULT" "key_get('spam-config', 'add-spam-result')";
let "AUTOLEARN_REPLIES_HAM" "key_get('spam-config', 'learn-ham-replies')";
# Whether the bayes classifier should be trained automatically
-let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable')";
+let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable') && !env.test";
# When to learn ham (score >= threshold)
let "AUTOLEARN_HAM_THRESHOLD" "key_get('spam-config', 'learn-ham-threshold')";
@@ -61,7 +61,7 @@ let "urls" "dedup(tokenize(header.subject, 'uri') + body_urls + html_body_urls)"
# Obtain thread name and subject
let "subject_lc" "to_lowercase(header.subject)";
let "subject_clean" "thread_name(header.subject)";
-let "body_and_subject" "subject_clean + text_body";
+let "body_and_subject" "subject_clean + ' ' + text_body";
# Obtain all recipients
let "recipients" "to_lowercase(header.to:cc:bcc[*].addr[*])";
@@ -2257,7 +2257,7 @@ while "i > 0" {
if eval "is_empty(token_rep)" {
# Set reputation
- eval "key_set(SPAM_DB, token_id, [score, 1], 2592000)";
+ eval "!env.test && key_set(SPAM_DB, token_id, [score, 1], 2592000)";
continue;
}
@@ -2265,7 +2265,7 @@ while "i > 0" {
let "token_score" "token_rep[0]";
let "token_count" "token_rep[1]";
let "updated_score" "(token_count + 1) * (score + 0.98 * token_score) / (0.98 * token_count + 1)";
- eval "key_set(SPAM_DB, token_id, [updated_score, token_count + 1], 2592000)";
+ eval "!env.test && key_set(SPAM_DB, token_id, [updated_score, token_count + 1], 2592000)";
# Assign weight
let "weight" "";
@@ -2343,7 +2343,7 @@ let "ADD_HEADER_SPAM_RESULT" "key_get('spam-config', 'add-spam-result')";
let "AUTOLEARN_REPLIES_HAM" "key_get('spam-config', 'learn-ham-replies')";
# Whether the bayes classifier should be trained automatically
-let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable')";
+let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable') && !env.test";
# When to learn ham (score >= threshold)
let "AUTOLEARN_HAM_THRESHOLD" "key_get('spam-config', 'learn-ham-threshold')";
@@ -2403,7 +2403,7 @@ let "ADD_HEADER_SPAM_RESULT" "key_get('spam-config', 'add-spam-result')";
let "AUTOLEARN_REPLIES_HAM" "key_get('spam-config', 'learn-ham-replies')";
# Whether the bayes classifier should be trained automatically
-let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable')";
+let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable') && !env.test";
# When to learn ham (score >= threshold)
let "AUTOLEARN_HAM_THRESHOLD" "key_get('spam-config', 'learn-ham-threshold')";
@@ -2444,6 +2444,66 @@ if eval "!key_exists(SPAM_DB, triplet)" {
'''
+[sieve.trusted.scripts.train]
+name = "Train Bayes Classifier"
+contents = '''
+
+#### Script config.sieve ####
+
+# Whether to add an X-Spam-Status header
+let "ADD_HEADER_SPAM" "key_get('spam-config', 'add-spam')";
+
+# Whether to add an X-Spam-Result header
+let "ADD_HEADER_SPAM_RESULT" "key_get('spam-config', 'add-spam-result')";
+
+# Whether message replies from authenticated users should be learned as ham
+let "AUTOLEARN_REPLIES_HAM" "key_get('spam-config', 'learn-ham-replies')";
+
+# Whether the bayes classifier should be trained automatically
+let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable') && !env.test";
+
+# When to learn ham (score >= threshold)
+let "AUTOLEARN_HAM_THRESHOLD" "key_get('spam-config', 'learn-ham-threshold')";
+
+# When to learn spam (score <= threshold)
+let "AUTOLEARN_SPAM_THRESHOLD" "key_get('spam-config', 'learn-spam-threshold')";
+
+# Keep difference for spam/ham learns for at least this value
+let "AUTOLEARN_SPAM_HAM_BALANCE" "key_get('spam-config', 'learn-balance')";
+
+# If ADD_HEADER_SPAM is enabled, mark as SPAM messages with a score above this threshold
+let "SCORE_SPAM_THRESHOLD" "key_get('spam-config', 'threshold-spam')";
+
+# Discard messages with a score above this threshold
+let "SCORE_DISCARD_THRESHOLD" "key_get('spam-config', 'threshold-discard')";
+
+# Reject messages with a score above this threshold
+let "SCORE_REJECT_THRESHOLD" "key_get('spam-config', 'threshold-reject')";
+
+# Directory name to use for local domain lookups (leave empty for default)
+let "DOMAIN_DIRECTORY" "key_get('spam-config', 'directory')";
+
+# Store to use for Bayes tokens and ids (leave empty for default)
+let "SPAM_DB" "key_get('spam-config', 'lookup')";
+
+
+#### Script train.sieve ####
+
+
+
+# Obtain thread name and subject
+let "contents" "thread_name(header.subject) + ' ' + body.to_text";
+
+if eval "env.train == 'spam'" {
+ eval "bayes_train(SPAM_DB, contents, true)";
+} elsif eval "env.train == 'ham'" {
+ eval "bayes_train(SPAM_DB, contents, false)";
+} else {
+ reject "Missing variable 'train'";
+}
+
+'''
+
[lookup]
spam-config = {
diff --git a/resources/config/spamfilter/scripts/config.sieve b/resources/config/spamfilter/scripts/config.sieve
index 6499f41d..33d69816 100644
--- a/resources/config/spamfilter/scripts/config.sieve
+++ b/resources/config/spamfilter/scripts/config.sieve
@@ -8,7 +8,7 @@ let "ADD_HEADER_SPAM_RESULT" "key_get('spam-config', 'add-spam-result')";
let "AUTOLEARN_REPLIES_HAM" "key_get('spam-config', 'learn-ham-replies')";
# Whether the bayes classifier should be trained automatically
-let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable')";
+let "AUTOLEARN_ENABLE" "key_get('spam-config', 'learn-enable') && !env.test";
# When to learn ham (score >= threshold)
let "AUTOLEARN_HAM_THRESHOLD" "key_get('spam-config', 'learn-ham-threshold')";
diff --git a/resources/config/spamfilter/scripts/prelude.sieve b/resources/config/spamfilter/scripts/prelude.sieve
index c50de1ef..86367716 100644
--- a/resources/config/spamfilter/scripts/prelude.sieve
+++ b/resources/config/spamfilter/scripts/prelude.sieve
@@ -13,7 +13,7 @@ let "urls" "dedup(tokenize(header.subject, 'uri') + body_urls + html_body_urls)"
# Obtain thread name and subject
let "subject_lc" "to_lowercase(header.subject)";
let "subject_clean" "thread_name(header.subject)";
-let "body_and_subject" "subject_clean + text_body";
+let "body_and_subject" "subject_clean + ' ' + text_body";
# Obtain all recipients
let "recipients" "to_lowercase(header.to:cc:bcc[*].addr[*])";
diff --git a/resources/config/spamfilter/scripts/reputation.sieve b/resources/config/spamfilter/scripts/reputation.sieve
index 71ff4e99..0974f4e7 100644
--- a/resources/config/spamfilter/scripts/reputation.sieve
+++ b/resources/config/spamfilter/scripts/reputation.sieve
@@ -41,7 +41,7 @@ while "i > 0" {
if eval "is_empty(token_rep)" {
# Set reputation
- eval "key_set(SPAM_DB, token_id, [score, 1], 2592000)";
+ eval "!env.test && key_set(SPAM_DB, token_id, [score, 1], 2592000)";
continue;
}
@@ -49,7 +49,7 @@ while "i > 0" {
let "token_score" "token_rep[0]";
let "token_count" "token_rep[1]";
let "updated_score" "(token_count + 1) * (score + 0.98 * token_score) / (0.98 * token_count + 1)";
- eval "key_set(SPAM_DB, token_id, [updated_score, token_count + 1], 2592000)";
+ eval "!env.test && key_set(SPAM_DB, token_id, [updated_score, token_count + 1], 2592000)";
# Assign weight
let "weight" "";
diff --git a/resources/config/spamfilter/scripts/train.sieve b/resources/config/spamfilter/scripts/train.sieve
new file mode 100644
index 00000000..8cae688e
--- /dev/null
+++ b/resources/config/spamfilter/scripts/train.sieve
@@ -0,0 +1,12 @@
+
+
+# Obtain thread name and subject
+let "contents" "thread_name(header.subject) + ' ' + body.to_text";
+
+if eval "env.train == 'spam'" {
+ eval "bayes_train(SPAM_DB, contents, true)";
+} elsif eval "env.train == 'ham'" {
+ eval "bayes_train(SPAM_DB, contents, false)";
+} else {
+ reject "Missing variable 'train'";
+}