Updated spam filter rules
Some checks are pending
trivy / Check (push) Waiting to run

This commit is contained in:
mdecimus 2024-10-06 14:56:28 +02:00
parent d0ce2b1a96
commit 881d4497ce
21 changed files with 184 additions and 47 deletions

View file

@ -2,6 +2,19 @@
All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/).
## [0.10.3] - 2024-10-07
To upgrade replace the `stalwart-mail` binary and then upgrade to the latest web-admin. Enterprise users wishing to use the new LLM-powered spam filter should also upgrade the spam filter rules.
### Added
- AI-powered Spam filtering and Sieve scripting (Enterprise feature).
### Changed
### Fixed
- S3-compatible backends: Retry on `5xx` errors.
- OIDC: Include `nonce` parameter in `id_token` response.
## [0.10.2] - 2024-10-02
To upgrade first upgrade the webadmin and then replace the `stalwart-mail` binary. If you read these instructions too late, you can upgrade to the latest web-admin using `curl -k -u admin:yourpass https://yourserver/api/update/webadmin`.

28
Cargo.lock generated
View file

@ -1052,7 +1052,7 @@ dependencies = [
[[package]]
name = "common"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"aes-gcm-siv",
"ahash 0.8.11",
@ -1668,7 +1668,7 @@ dependencies = [
[[package]]
name = "directory"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"argon2",
@ -3010,7 +3010,7 @@ checksum = "edcd27d72f2f071c64249075f42e205ff93c9a4c5f6c6da53e79ed9f9832c285"
[[package]]
name = "imap"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"common",
@ -3222,7 +3222,7 @@ dependencies = [
[[package]]
name = "jmap"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"aes",
"aes-gcm",
@ -3303,7 +3303,7 @@ dependencies = [
[[package]]
name = "jmap_proto"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"fast-float",
@ -3660,7 +3660,7 @@ dependencies = [
[[package]]
name = "mail-server"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"common",
"directory",
@ -3679,7 +3679,7 @@ dependencies = [
[[package]]
name = "managesieve"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"bincode",
@ -3957,7 +3957,7 @@ dependencies = [
[[package]]
name = "nlp"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"bincode",
@ -4511,7 +4511,7 @@ dependencies = [
[[package]]
name = "pop3"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"common",
"directory",
@ -6081,7 +6081,7 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "smtp"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"bincode",
@ -6197,7 +6197,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "stalwart-cli"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"clap",
"console",
@ -6228,7 +6228,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "store"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"arc-swap",
@ -6872,7 +6872,7 @@ dependencies = [
[[package]]
name = "trc"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"base64 0.22.1",
@ -7115,7 +7115,7 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "utils"
version = "0.10.2"
version = "0.10.3"
dependencies = [
"ahash 0.8.11",
"base64 0.22.1",

View file

@ -36,11 +36,9 @@
Key features:
- **JMAP** server:
- [JMAP Core](https://datatracker.ietf.org/doc/html/rfc8620) and [JMAP Mail](https://datatracker.ietf.org/doc/html/rfc8621) full compliance.
- [JMAP for Sieve Scripts](https://www.ietf.org/archive/id/draft-ietf-jmap-sieve-22.html) extension for managing Sieve scripts.
- [JMAP for WebSocket](https://datatracker.ietf.org/doc/html/rfc8887), [JMAP Blob Management](https://www.rfc-editor.org/rfc/rfc9404.html) and [JMAP for Quotas](https://www.rfc-editor.org/rfc/rfc9425.html) extensions.
- **IMAP4**, **POP3** and **ManageSieve** server:
- **JMAP**, **IMAP4**, **POP3** and **ManageSieve** server:
- [JMAP](https://datatracker.ietf.org/doc/html/rfc8621) server with
[Sieve Scripts](https://www.ietf.org/archive/id/draft-ietf-jmap-sieve-22.html), [WebSocket](https://datatracker.ietf.org/doc/html/rfc8887), [Blob Management](https://www.rfc-editor.org/rfc/rfc9404.html) and [Quotas](https://www.rfc-editor.org/rfc/rfc9425.html) extensions.
- [IMAP4rev2](https://datatracker.ietf.org/doc/html/rfc9051) and [IMAP4rev1](https://datatracker.ietf.org/doc/html/rfc3501) server with support for [numerous extensions](https://stalw.art/docs/development/rfcs#imap4-and-extensions).
- [POP3](https://datatracker.ietf.org/doc/html/rfc1939) server with [extensions](https://datatracker.ietf.org/doc/html/rfc2449), [STLS](https://datatracker.ietf.org/doc/html/rfc2595) and [SASL](https://datatracker.ietf.org/doc/html/rfc5034) support.
- [ManageSieve](https://datatracker.ietf.org/doc/html/rfc5804) server for managing Sieve scripts.
@ -50,8 +48,9 @@ Key features:
- Inbound throttling and filtering with granular configuration rules, sieve scripting, MTA hooks and milter integration.
- Distributed virtual queues with delayed delivery, priority delivery, quotas, routing rules and throttling support.
- Envelope rewriting and message modification.
- **Spam and Phishing** filter:
- Built-in **Spam and Phishing** filter:
- Comprehensive set of filtering **rules** on par with popular solutions.
- LLM-driven spam filtering and message analysis.
- Statistical **spam classifier** with automatic training capabilities.
- DNS Blocklists (**DNSBLs**) checking of IP addresses, domains, and hashes.
- Collaborative digest-based spam filtering with **Pyzor**.
@ -63,7 +62,6 @@ Key features:
- **Flexible and scalable**:
- Pluggable storage backends with **RocksDB**, **FoundationDB**, **PostgreSQL**, **mySQL**, **SQLite**, **S3-Compatible**, **Redis** and **ElasticSearch** support.
- **Clustering** support with node autodiscovery and partition-tolerant failure detection.
- Built-in, **OpenID**, **LDAP** or **SQL** authentication backend support.
- Full-text search available in 17 languages.
- Sieve scripting language with support for all [registered extensions](https://www.iana.org/assignments/sieve-extensions/sieve-extensions.xhtml).
- Email aliases, mailing lists, subaddressing and catch-all addresses support.
@ -73,14 +71,18 @@ Key features:
- **Secure and robust**:
- Encryption at rest with **S/MIME** or **OpenPGP**.
- Automatic TLS certificate provisioning with [ACME](https://datatracker.ietf.org/doc/html/rfc8555) using `TLS-ALPN-01`, `DNS-01` or `HTTP-01` challenges.
- OpenID Connect, OAuth 2.0 authentication with [authorization code](https://www.rfc-editor.org/rfc/rfc8628) and [device authorization](https://www.rfc-editor.org/rfc/rfc8628) flows.
- Two-factor authentication with Time-based One-Time Passwords (`2FA-TOTP`)
- Application passwords (App Passwords).
- Automated blocking of hosts that perform brute-force attacks or scans (aka **fail2ban**).
- Roles, permissions and Access Control Lists (ACLs).
- Rate limiting.
- Security audited (read the [report](https://stalw.art/blog/security-audit)).
- Memory safe (thanks to Rust).
- **Authentication and Authorization**:
- **OpenID Connect** authentication.
- OAuth 2.0 authorization with [authorization code](https://www.rfc-editor.org/rfc/rfc8628) and [device authorization](https://www.rfc-editor.org/rfc/rfc8628) flows.
- **LDAP**, **OIDC**, **SQL** or built-in authentication backend support.
- Two-factor authentication with Time-based One-Time Passwords (`2FA-TOTP`)
- Application passwords (App Passwords).
- Roles and permissions.
- Access Control Lists (ACLs).
- **Observability**:
- Logging and tracing with **OpenTelemetry**, journald, log files and console support.
- Metrics with **OpenTelemetry** and **Prometheus** integration.

View file

@ -5,7 +5,7 @@ authors = ["Stalwart Labs Ltd. <hello@stalw.art>"]
license = "AGPL-3.0-only OR LicenseRef-SEL"
repository = "https://github.com/stalwartlabs/cli"
homepage = "https://github.com/stalwartlabs/cli"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
readme = "README.md"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "common"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -7,13 +7,13 @@
use std::time::Instant;
use directory::Permission;
use sieve::{runtime::Variable, FunctionMap};
use sieve::{compiler::Number, runtime::Variable, FunctionMap};
use trc::{AiEvent, SecurityEvent};
use super::PluginContext;
pub fn register(plugin_id: u32, fnc_map: &mut FunctionMap) {
fnc_map.set_external_function("llm_prompt", plugin_id, 2);
fnc_map.set_external_function("llm_prompt", plugin_id, 3);
}
pub async fn exec(ctx: PluginContext<'_>) -> trc::Result<Variable> {
@ -29,6 +29,10 @@ pub async fn exec(ctx: PluginContext<'_>) -> trc::Result<Variable> {
if name.as_ref() == "echo-test" {
return Ok(prompt.to_string().into());
}
let temperature = ctx.arguments[2].to_number_checked().map(|n| match n {
Number::Integer(n) => (n as f64).clamp(0.0, 1.0),
Number::Float(n) => n.clamp(0.0, 1.0),
});
if let Some(ai_api) = ctx.server.core.enterprise.as_ref().and_then(|e| {
if ctx.access_token.map_or(true, |token| {
@ -54,7 +58,7 @@ pub async fn exec(ctx: PluginContext<'_>) -> trc::Result<Variable> {
}
}) {
let time = Instant::now();
match ai_api.send_request(prompt.as_ref(), None).await {
match ai_api.send_request(prompt.as_ref(), temperature).await {
Ok(response) => {
trc::event!(
Ai(AiEvent::LlmResponse),

View file

@ -1,6 +1,6 @@
[package]
name = "directory"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "imap"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "jmap_proto"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "jmap"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -7,7 +7,7 @@ homepage = "https://stalw.art"
keywords = ["imap", "jmap", "smtp", "email", "mail", "server"]
categories = ["email"]
license = "AGPL-3.0-only OR LicenseRef-SEL"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "managesieve"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "nlp"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "pop3"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -7,7 +7,7 @@ homepage = "https://stalw.art/smtp"
keywords = ["smtp", "email", "mail", "server"]
categories = ["email"]
license = "AGPL-3.0-only OR LicenseRef-SEL"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "store"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "trc"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,6 +1,6 @@
[package]
name = "utils"
version = "0.10.2"
version = "0.10.3"
edition = "2021"
resolver = "2"

View file

@ -1,5 +1,5 @@
[version]
spam-filter = "1.1"
spam-filter = "1.2"
[sieve.trusted.scripts.spam-filter]
name = "Spam Filter"
@ -43,6 +43,15 @@ let "DOMAIN_DIRECTORY" "key_get('spam-config', 'directory')";
# Store to use for Bayes tokens and ids (leave empty for default)
let "SPAM_DB" "key_get('spam-config', 'lookup')";
# LLM model to use for spam classification
let "LLM_MODEL" "key_get('spam-config', 'llm-model')";
# LLM prompt to use for spam classification
let "LLM_PROMPT_TEXT" "key_get('spam-config', 'llm-prompt')";
# Whether to add an X-Spam-Llm-Result header
let "ADD_HEADER_LLM" "key_get('spam-config', 'add-llm-result')";
#### Script prelude.sieve ####
@ -2098,6 +2107,51 @@ if eval "!is_empty(pyzor_response) && pyzor_response[0] == 200" {
}
#### Script llm.sieve ####
if eval "LLM_MODEL && LLM_PROMPT_TEXT" {
let "llm_result" "trim(split_n(llm_prompt(LLM_MODEL, LLM_PROMPT_TEXT + '\n\nSubject: ' + subject_clean + '\n\n' + text_body, 0.5), ',', 3))";
if eval "eq_ignore_case(llm_result[0], 'Unsolicited')" {
if eval "eq_ignore_case(llm_result[1], 'High')" {
let "t.LLM_UNSOLICITED_HIGH" "1";
} elsif eval "eq_ignore_case(llm_result[1], 'Medium')" {
let "t.LLM_UNSOLICITED_MEDIUM" "1";
} else {
let "t.LLM_UNSOLICITED_LOW" "1";
}
} elsif eval "eq_ignore_case(llm_result[0], 'Commercial')" {
if eval "eq_ignore_case(llm_result[1], 'High')" {
let "t.LLM_COMMERCIAL_HIGH" "1";
} elsif eval "eq_ignore_case(llm_result[1], 'Medium')" {
let "t.LLM_COMMERCIAL_MEDIUM" "1";
} else {
let "t.LLM_COMMERCIAL_LOW" "1";
}
} elsif eval "eq_ignore_case(llm_result[0], 'Harmful')" {
if eval "eq_ignore_case(llm_result[1], 'High')" {
let "t.LLM_HARMFUL_HIGH" "1";
} elsif eval "eq_ignore_case(llm_result[1], 'Medium')" {
let "t.LLM_HARMFUL_MEDIUM" "1";
} else {
let "t.LLM_HARMFUL_LOW" "1";
}
} elsif eval "eq_ignore_case(llm_result[0], 'Legitimate')" {
if eval "eq_ignore_case(llm_result[1], 'High')" {
let "t.LLM_LEGITIMATE_HIGH" "1";
} elsif eval "eq_ignore_case(llm_result[1], 'Medium')" {
let "t.LLM_LEGITIMATE_MEDIUM" "1";
} else {
let "t.LLM_LEGITIMATE_LOW" "1";
}
}
if eval "ADD_HEADER_LLM && count(llm_result) > 2" {
eval "add_header('X-Spam-Llm-Result', 'Category=' + llm_result[0] + '; Confidence=' + llm_result[1] + '; Explanation=' + llm_result[2])";
}
}
#### Script composites.sieve ####
if eval "t.MISSING_ESSENTIAL_HEADERS && t.SINGLE_SHORT_PART" {
@ -2369,6 +2423,15 @@ let "DOMAIN_DIRECTORY" "key_get('spam-config', 'directory')";
# Store to use for Bayes tokens and ids (leave empty for default)
let "SPAM_DB" "key_get('spam-config', 'lookup')";
# LLM model to use for spam classification
let "LLM_MODEL" "key_get('spam-config', 'llm-model')";
# LLM prompt to use for spam classification
let "LLM_PROMPT_TEXT" "key_get('spam-config', 'llm-prompt')";
# Whether to add an X-Spam-Llm-Result header
let "ADD_HEADER_LLM" "key_get('spam-config', 'add-llm-result')";
#### Script replies_out.sieve ####
@ -2429,6 +2492,15 @@ let "DOMAIN_DIRECTORY" "key_get('spam-config', 'directory')";
# Store to use for Bayes tokens and ids (leave empty for default)
let "SPAM_DB" "key_get('spam-config', 'lookup')";
# LLM model to use for spam classification
let "LLM_MODEL" "key_get('spam-config', 'llm-model')";
# LLM prompt to use for spam classification
let "LLM_PROMPT_TEXT" "key_get('spam-config', 'llm-prompt')";
# Whether to add an X-Spam-Llm-Result header
let "ADD_HEADER_LLM" "key_get('spam-config', 'add-llm-result')";
#### Script greylist.sieve ####
@ -2486,6 +2558,15 @@ let "DOMAIN_DIRECTORY" "key_get('spam-config', 'directory')";
# Store to use for Bayes tokens and ids (leave empty for default)
let "SPAM_DB" "key_get('spam-config', 'lookup')";
# LLM model to use for spam classification
let "LLM_MODEL" "key_get('spam-config', 'llm-model')";
# LLM prompt to use for spam classification
let "LLM_PROMPT_TEXT" "key_get('spam-config', 'llm-prompt')";
# Whether to add an X-Spam-Llm-Result header
let "ADD_HEADER_LLM" "key_get('spam-config', 'add-llm-result')";
#### Script train.sieve ####
@ -2518,7 +2599,32 @@ spam-config = {
"threshold-discard" = "0.0",
"threshold-reject" = "0.0",
"directory" = "",
"lookup" = ""
"lookup" = "",
"llm-model" = "",
"llm-prompt" = "You are an AI assistant specialized in analyzing email content to detect unsolicited, commercial, or harmful messages. Your task is to examine the provided email, including its subject line, and determine if it falls into any of these categories. Please follow these steps:
- Carefully read the entire email content, including the subject line.
- Look for indicators of unsolicited messages, such as:
* Lack of prior relationship or consent
* Mass-mailing characteristics
* Vague or misleading sender information
- Identify commercial content by checking for:
* Promotional language
* Product or service offerings
* Call-to-action for purchases
- Detect potentially harmful content by searching for:
* Phishing attempts (requests for personal information, suspicious links)
* Malware indicators (suspicious attachments, urgent calls to action)
* Scams or fraudulent schemes
- Analyze the overall tone, intent, and legitimacy of the email.
- Determine the most appropriate single category for the email: Unsolicited, Commercial, Harmful, or Legitimate.
- Assess your confidence level in this determination: High, Medium, or Low.
- Provide a brief explanation for your determination.
- Format your response as follows, separated by commas: Category,Confidence,Explanation
* Example: Unsolicited,High,The email contains mass-mailing characteristics without any prior relationship context.
Here's the email to analyze, please provide your analysis based on the above instructions, ensuring your response is in the specified comma-separated format:",
"add-llm-result" = true
}
spam-scores = {"ABUSE_SURBL" = "5.0",
@ -2884,7 +2990,19 @@ spam-scores = {"ABUSE_SURBL" = "5.0",
"SHORT_PART_BAD_HEADERS" = "7.0",
"MISSING_ESSENTIAL_HEADERS" = "7.0",
"SINGLE_SHORT_PART" = "0.0",
"COMPLETELY_EMPTY" = "7.0"}
"COMPLETELY_EMPTY" = "7.0",
"LLM_UNSOLICITED_HIGH" = "3.0",
"LLM_UNSOLICITED_MEDIUM" = "2.0",
"LLM_UNSOLICITED_LOW" = "0.5",
"LLM_COMMERCIAL_HIGH" = "3.0",
"LLM_COMMERCIAL_MEDIUM" = "2.0",
"LLM_COMMERCIAL_LOW" = "0.5",
"LLM_HARMFUL_HIGH" = "3.0",
"LLM_HARMFUL_MEDIUM" = "2.0",
"LLM_HARMFUL_LOW" = "0.5",
"LLM_LEGITIMATE_HIGH" = "-3.0",
"LLM_LEGITIMATE_MEDIUM" = "-2.0",
"LLM_LEGITIMATE_LOW" = "-0.5"}
spam-dmarc = {"18f.gov",
"1password.com",

View file

@ -1,5 +1,5 @@
if eval "LLM_MODEL && LLM_PROMPT_TEXT" {
let "llm_result" "trim(split_n(llm_prompt(LLM_MODEL, LLM_PROMPT_TEXT + '\n\nSubject: ' + subject_clean + '\n\n' + text_body), ',', 3))";
let "llm_result" "trim(split_n(llm_prompt(LLM_MODEL, LLM_PROMPT_TEXT + '\n\nSubject: ' + subject_clean + '\n\n' + text_body, 0.5), ',', 3))";
if eval "eq_ignore_case(llm_result[0], 'Unsolicited')" {
if eval "eq_ignore_case(llm_result[1], 'High')" {

View file

@ -61,6 +61,6 @@ if not mailboxexists "My" {
error "'My' not found.";
}
if eval "llm_prompt('echo-test', 'hello world') != 'hello world'" {
if eval "llm_prompt('echo-test', 'hello world', 0.5) != 'hello world'" {
error "llm_prompt is unavailable.";
}