IP addresses trigger R_SUSPICIOUS_URL false positive (closes #461 #419)

This commit is contained in:
mdecimus 2024-05-22 16:52:26 +02:00
parent 0fc5d40ec1
commit 0d2e58361b
13 changed files with 102 additions and 45 deletions

37
Cargo.lock generated
View file

@ -1517,6 +1517,37 @@ dependencies = [
"syn 2.0.63",
]
[[package]]
name = "derive_builder"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
dependencies = [
"darling 0.20.8",
"proc-macro2",
"quote",
"syn 2.0.63",
]
[[package]]
name = "derive_builder_macro"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
dependencies = [
"derive_builder_core",
"syn 2.0.63",
]
[[package]]
name = "des"
version = "0.8.1"
@ -2941,13 +2972,13 @@ dependencies = [
[[package]]
name = "jieba-rs"
version = "0.6.8"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93f0c1347cd3ac8d7c6e3a2dc33ac496d365cf09fc0831aa61111e1a6738983e"
checksum = "c1e2b0210dc78b49337af9e49d7ae41a39dceac6e5985613f1cf7763e2f76a25"
dependencies = [
"cedarwood",
"derive_builder",
"fxhash",
"hashbrown 0.14.5",
"lazy_static",
"phf",
"phf_codegen",

View file

@ -5,7 +5,7 @@
</p>
<h3 align="center">
Secure & Modern All-in-One Mail Server (IMAP, JMAP, SMTP) 🛡️
Secure & Modern All-in-One Mail Server (IMAP, JMAP, POP3, SMTP) 🛡️
</h3>
<br>
@ -44,27 +44,25 @@
## Features
**Stalwart Mail Server** is an open-source mail server solution with JMAP, IMAP4, and SMTP support and a wide range of modern features. It is written in Rust and designed to be secure, fast, robust and scalable.
**Stalwart Mail Server** is an open-source mail server solution with JMAP, IMAP4, POP3, and SMTP support and a wide range of modern features. It is written in Rust and designed to be secure, fast, robust and scalable.
Key features:
- **JMAP** server:
- JMAP Core ([RFC 8620](https://datatracker.ietf.org/doc/html/rfc8620))
- JMAP Mail ([RFC 8621](https://datatracker.ietf.org/doc/html/rfc8621))
- JMAP for Sieve Scripts ([DRAFT-SIEVE-22](https://www.ietf.org/archive/id/draft-ietf-jmap-sieve-22.html))
- JMAP over WebSocket ([RFC 8887](https://datatracker.ietf.org/doc/html/rfc8887)), JMAP Blob Management ([RFC9404](https://www.rfc-editor.org/rfc/rfc9404.html)) and JMAP for Quotas ([RFC9425](https://www.rfc-editor.org/rfc/rfc9425.html)) extensions.
- **IMAP4** server:
- IMAP4rev2 ([RFC 9051](https://datatracker.ietf.org/doc/html/rfc9051)) full compliance.
- IMAP4rev1 ([RFC 3501](https://datatracker.ietf.org/doc/html/rfc3501)) backwards compatible.
- ManageSieve ([RFC 5804](https://datatracker.ietf.org/doc/html/rfc5804)) server.
- Numerous [extensions](https://stalw.art/docs/development/rfcs#imap4-and-extensions) supported.
- [JMAP Core](https://datatracker.ietf.org/doc/html/rfc8620) and [JMAP Mail](https://datatracker.ietf.org/doc/html/rfc8621) full compliance.
- [JMAP for Sieve Scripts](https://www.ietf.org/archive/id/draft-ietf-jmap-sieve-22.html) extension for managing Sieve scripts.
- [JMAP for WebSocket](https://datatracker.ietf.org/doc/html/rfc8887), [JMAP Blob Management](https://www.rfc-editor.org/rfc/rfc9404.html) and [JMAP for Quotas](https://www.rfc-editor.org/rfc/rfc9425.html) extensions.
- **IMAP4**, **POP3** and **ManageSieve** server:
- [IMAP4rev2](https://datatracker.ietf.org/doc/html/rfc9051) and [IMAP4rev1](https://datatracker.ietf.org/doc/html/rfc3501) server with support for [numerous extensions](https://stalw.art/docs/development/rfcs#imap4-and-extensions).
- [POP3](https://datatracker.ietf.org/doc/html/rfc1939) server with [extensions](https://datatracker.ietf.org/doc/html/rfc2449), [STLS](https://datatracker.ietf.org/doc/html/rfc2595) and [SASL](https://datatracker.ietf.org/doc/html/rfc5034) support.
- [ManageSieve](https://datatracker.ietf.org/doc/html/rfc5804) server for managing Sieve scripts.
- **SMTP** server:
- Built-in [DMARC](https://datatracker.ietf.org/doc/html/rfc7489), [DKIM](https://datatracker.ietf.org/doc/html/rfc6376), [SPF](https://datatracker.ietf.org/doc/html/rfc7208) and [ARC](https://datatracker.ietf.org/doc/html/rfc8617) support for message authentication.
- Strong transport security through [DANE](https://datatracker.ietf.org/doc/html/rfc6698), [MTA-STS](https://datatracker.ietf.org/doc/html/rfc8461) and [SMTP TLS](https://datatracker.ietf.org/doc/html/rfc8460) reporting.
- Inbound throttling and filtering with granular configuration rules, sieve scripting and milter integration.
- Distributed virtual queues with delayed delivery, priority delivery, quotas, routing rules and throttling support.
- Envelope rewriting and message modification.
- **Spam and Phishing** filter:
- **Spam Phishing** filter:
- Comprehensive set of filtering **rules** on par with popular solutions.
- Statistical **spam classifier** with automatic training capabilities.
- DNS Blocklists (**DNSBLs**) checking of IP addresses, domains, and hashes.

View file

@ -313,7 +313,7 @@ impl JmapConfig {
.unwrap_or_else(|| SimpleCron::parse_value("15 * *").unwrap()),
account_purge_frequency: config
.property_or_default::<SimpleCron>("jmap.account.purge.frequency", "0 0 *")
.unwrap_or_else(|| SimpleCron::parse_value("15 * *").unwrap()),
.unwrap_or_else(|| SimpleCron::parse_value("0 0 *").unwrap()),
fallback_admin: config
.value("authentication.fallback-admin.user")
.and_then(|u| {

View file

@ -502,6 +502,7 @@ impl Patterns {
Pattern::Include(MatchType::StartsWith("directory.".to_string())),
Pattern::Include(MatchType::StartsWith("tracer.".to_string())),
Pattern::Exclude(MatchType::StartsWith("server.blocked-ip.".to_string())),
Pattern::Exclude(MatchType::StartsWith("server.allowed-ip.".to_string())),
Pattern::Include(MatchType::StartsWith("server.".to_string())),
Pattern::Include(MatchType::StartsWith("certificate.".to_string())),
Pattern::Include(MatchType::StartsWith(

View file

@ -296,6 +296,7 @@ where
| TokenType::Url(_)
| TokenType::UrlNoScheme(_)
| TokenType::UrlNoHost(_)
| TokenType::IpAddr(_)
| TokenType::Email(_) => {
if token_start != usize::MAX {
add_line(&mut clean_line, &line[token_start..token_end]);

View file

@ -17,7 +17,7 @@ lazy_static = "1.4"
whatlang = "0.16" # Language detection
rust-stemmers = "1.2" # Stemmers
tinysegmenter = "0.1" # Japanese tokenizer
jieba-rs = "0.6" # Chinese stemmer
jieba-rs = "0.7" # Chinese stemmer
phf = { version = "0.11", features = ["macros"] }
lru-cache = "0.1.2"
parking_lot = "0.12.1"

View file

@ -118,6 +118,7 @@ impl<'x, 'y> Iterator for BayesTokenizer<'x, 'y> {
continue;
}
}
TokenType::IpAddr(word) => word.into(),
TokenType::UrlNoScheme(word) => word
.split_once('/')
.map_or(word, |(h, _)| h)

View file

@ -55,6 +55,7 @@ pub enum TokenType<T> {
Url(T),
UrlNoScheme(T),
UrlNoHost(T),
IpAddr(T),
Email(T),
Float(T),
}
@ -328,7 +329,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
// Try parsing hostname
let mut is_valid_host = true;
let (host_start_pos, mut end_pos) = if has_scheme {
let (host_start_pos, mut end_pos, is_ip) = if has_scheme {
let mut start_pos = usize::MAX;
let mut end_pos = usize::MAX;
let mut restore_pos = self.peek_pos;
@ -387,12 +388,11 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
}
self.peek_pos = restore_pos;
let is_ip = is_ipv6 || (int_count == 4 && dot_count == 3 && text_count == 0);
if end_pos != usize::MAX {
is_valid_host =
(last_label_is_tld && dot_count >= 1 && (text_count + int_count) >= 2)
|| (int_count == 4 && dot_count == 3)
|| is_ipv6;
(start_pos, end_pos)
(last_label_is_tld && dot_count >= 1 && (text_count + int_count) >= 2) || is_ip;
(start_pos, end_pos, is_ip)
} else {
return None;
}
@ -487,15 +487,18 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
self.peek_pos = restore_pos;
}
let word = &self.text[start_pos..end_pos];
Token {
word: if has_scheme {
if is_valid_host {
TokenType::Url(&self.text[start_pos..end_pos])
TokenType::Url(word)
} else {
TokenType::UrlNoHost(&self.text[start_pos..end_pos])
TokenType::UrlNoHost(word)
}
} else if is_ip && !found_query_start {
TokenType::IpAddr(word)
} else {
TokenType::UrlNoScheme(&self.text[start_pos..end_pos])
TokenType::UrlNoScheme(word)
},
from: start_pos,
to: end_pos,
@ -528,7 +531,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
}
// Obtain domain part
let (_, end_pos) = self.try_parse_hostname()?;
let (_, end_pos, _) = self.try_parse_hostname()?;
Token {
word: TokenType::Email(&self.text[start_token.from..end_pos]),
@ -538,7 +541,7 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
.into()
}
fn try_parse_hostname(&mut self) -> Option<(usize, usize)> {
fn try_parse_hostname(&mut self) -> Option<(usize, usize, bool)> {
let mut last_ch = u8::MAX;
let mut has_int = false;
let mut has_alpha = false;
@ -561,7 +564,9 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
continue;
}
TokenType::Punctuation('[') if start_pos == usize::MAX => {
return self.try_parse_ipv6(token.from);
return self
.try_parse_ipv6(token.from)
.map(|(from, to)| (from, to, true));
}
TokenType::Alphabetic(text) | TokenType::Alphanumeric(text) if text.len() <= 63 => {
last_label_is_tld =
@ -598,11 +603,9 @@ impl<'x, 'y> TypesTokenizer<'x, 'y> {
dot_count -= 1;
}
if end_pos != usize::MAX
&& dot_count >= 1
&& (last_label_is_tld || (has_int && !has_alpha && dot_count == 3))
{
(start_pos, end_pos).into()
let is_ipv4 = has_int && !has_alpha && dot_count == 3;
if end_pos != usize::MAX && dot_count >= 1 && (last_label_is_tld || is_ipv4) {
(start_pos, end_pos, is_ipv4).into()
} else {
None
}
@ -2352,7 +2355,7 @@ mod test {
"https://127.0.0.1/",
vec![TokenType::Url("https://127.0.0.1/")],
),
("1.0.0.0", vec![TokenType::UrlNoScheme("1.0.0.0")]),
("1.0.0.0", vec![TokenType::IpAddr("1.0.0.0")]),
(
"1.0.0.0/foo/bar",
vec![TokenType::UrlNoScheme("1.0.0.0/foo/bar")],
@ -2373,7 +2376,7 @@ mod test {
vec![
TokenType::Integer("1"),
TokenType::Punctuation('.'),
TokenType::UrlNoScheme("0.0.0.0"),
TokenType::IpAddr("0.0.0.0"),
],
),
(

View file

@ -1226,12 +1226,12 @@ foreverypart {
if eval "!has_plain_part && ma_ct == 'text/plain'" {
let "text_part" "part.text";
let "text_part_words" "tokenize(text_part, 'words')";
let "text_part_uris" "count(tokenize(text_part, 'uri_strict'))";
let "text_part_uris" "count(dedup(uri_part(tokenize(text_part, 'uri_strict'), 'host')))";
let "has_plain_part" "1";
} elsif eval "!has_html_part && ma_ct == 'text/html'" {
let "html_part" "html_to_text(part.text)";
let "html_part_words" "tokenize(html_part, 'words')";
let "html_part_uris" "count(tokenize(html_part, 'uri_strict'))";
let "html_part_uris" "count(dedup(uri_part(tokenize(part.text, 'uri_strict'), 'host')))";
let "has_html_part" "1";
}
}
@ -2683,7 +2683,7 @@ spam-scores = {"ABUSE_SURBL" = "5.0",
"RBL_VIRUSFREE_BOTNET" = "2.0",
"RCPT_ADDR_IN_SUBJECT" = "3.0",
"RCPT_COUNT_FIVE" = "0.0",
"RCPT_COUNT_GT_50" = "0.0",
"RCPT_COUNT_GT_50" = "1.0",
"RCPT_COUNT_ONE" = "0.0",
"RCPT_COUNT_SEVEN" = "0.0",
"RCPT_COUNT_THREE" = "0.0",
@ -2810,7 +2810,7 @@ spam-scores = {"ABUSE_SURBL" = "5.0",
"URIBL_BLOCKED" = "0.0",
"URIBL_GREY" = "1.5",
"URIBL_RED" = "3.5",
"URI_COUNT_ODD" = "1.0",
"URI_COUNT_ODD" = "0.5",
"URI_HIDDEN_PATH" = "1.0",
"URL_IN_SUBJECT" = "4.0",
"URL_REDIRECTOR_NESTED" = "1.0",

View file

@ -220,7 +220,7 @@ spam-scores = {"ABUSE_SURBL" = "5.0",
"RBL_VIRUSFREE_BOTNET" = "2.0",
"RCPT_ADDR_IN_SUBJECT" = "3.0",
"RCPT_COUNT_FIVE" = "0.0",
"RCPT_COUNT_GT_50" = "0.0",
"RCPT_COUNT_GT_50" = "1.0",
"RCPT_COUNT_ONE" = "0.0",
"RCPT_COUNT_SEVEN" = "0.0",
"RCPT_COUNT_THREE" = "0.0",
@ -347,7 +347,7 @@ spam-scores = {"ABUSE_SURBL" = "5.0",
"URIBL_BLOCKED" = "0.0",
"URIBL_GREY" = "1.5",
"URIBL_RED" = "3.5",
"URI_COUNT_ODD" = "1.0",
"URI_COUNT_ODD" = "0.5",
"URI_HIDDEN_PATH" = "1.0",
"URL_IN_SUBJECT" = "4.0",
"URL_REDIRECTOR_NESTED" = "1.0",

View file

@ -50,12 +50,12 @@ foreverypart {
if eval "!has_plain_part && ma_ct == 'text/plain'" {
let "text_part" "part.text";
let "text_part_words" "tokenize(text_part, 'words')";
let "text_part_uris" "count(tokenize(text_part, 'uri_strict'))";
let "text_part_uris" "count(dedup(uri_part(tokenize(text_part, 'uri_strict'), 'host')))";
let "has_plain_part" "1";
} elsif eval "!has_html_part && ma_ct == 'text/html'" {
let "html_part" "html_to_text(part.text)";
let "html_part_words" "tokenize(html_part, 'words')";
let "html_part_uris" "count(tokenize(html_part, 'uri_strict'))";
let "html_part_uris" "count(dedup(uri_part(tokenize(part.text, 'uri_strict'), 'host')))";
let "has_html_part" "1";
}
}

View file

@ -50,7 +50,7 @@ dkim.domains tenthrevolution.com
dmarc.result pass
remote_ip 185.58.86.181
tls.version TLSv1.3
expect_header X-Spam-Status No, score=4.
expect_header X-Spam-Status No, score=3.
expect_header X-Spam-Result
expect from_eq_envfrom from_has_dn helo_nores_a_or_mx forged_rcvd_trail date_in_past arc_na uri_count_odd dkim_signed has_attachment spf_allow rcvd_tls_last rcpt_count_one mime_good subject_ends_spaces fromhost_nores_a_or_mx to_dn_eq_addr_all dkim_allow dmarc_policy_allow rcvd_count_three to_match_envrcpt_all
@ -627,7 +627,7 @@ remote_ip 51.89.165.39
tls.version TLS1_2
expect_header X-Spam-Status Yes, score=13.
expect_header X-Spam-Result
expect has_replyto violated_direct_spf replyto_addr_eq_from once_received r_parts_differ mid_rhs_match_from fromhost_nores_a_or_mx from_has_dn dkim_allow date_in_past to_match_envrcpt_all html_short_link_img_1 rcpt_count_one arc_na helo_nores_a_or_mx spf_softfail rcvd_tls_last rcvd_count_zero replyto_dom_eq_from_dom to_dn_none has_list_unsub dkim_signed rdns_none from_eq_envfrom dmarc_policy_reject
expect has_replyto violated_direct_spf replyto_addr_eq_from uri_count_odd once_received r_parts_differ mid_rhs_match_from fromhost_nores_a_or_mx from_has_dn dkim_allow date_in_past to_match_envrcpt_all html_short_link_img_1 rcpt_count_one arc_na helo_nores_a_or_mx spf_softfail rcvd_tls_last rcvd_count_zero replyto_dom_eq_from_dom to_dn_none has_list_unsub dkim_signed rdns_none from_eq_envfrom dmarc_policy_reject
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; s=sectionalism; d=grupokonecta.net;
h=To:Subject:Message-ID:Date:From:Reply-To:MIME-Version:List-Unsubscribe:

View file

@ -82,3 +82,25 @@ Subject: plain test
https://phishing-open.org
https://phishing-tank.com
<!-- NEXT TEST -->
expect
Subject: IPs are not urls
192.168.1.1
<!-- NEXT TEST -->
expect
Content-Type: text/html; charset="utf-8"
Subject: IPs in HTML are not urls
<html>
Das System wurde um 01.01.1970 08:28:00 für die IP-Adresse
123.123.123.123 gesperrt.<br>
<br>
Der Besucher hat versucht, sich mit folgenden Daten anzumelden.<br>
Partner: 12345678<br>
Portal: <a href="https://www.localhost.de/example.php" target="_blank">IP-Sperre einsehen</a>
</html>