mirror of
https://github.com/stalwartlabs/mail-server.git
synced 2025-10-08 11:36:00 +08:00
Fixed MIXED_CHARSET spam filter check
This commit is contained in:
parent
cae7d43e6f
commit
043b53f3e9
7 changed files with 36 additions and 14 deletions
|
@ -51,7 +51,7 @@ pub fn register_functions_trusted() -> FunctionMap {
|
||||||
.with_function("winnow", fn_winnow)
|
.with_function("winnow", fn_winnow)
|
||||||
.with_function("has_zwsp", fn_has_zwsp)
|
.with_function("has_zwsp", fn_has_zwsp)
|
||||||
.with_function("has_obscured", fn_has_obscured)
|
.with_function("has_obscured", fn_has_obscured)
|
||||||
.with_function("is_single_script", fn_is_single_script)
|
.with_function("is_mixed_charset", fn_is_mixed_charset)
|
||||||
.with_function("puny_decode", fn_puny_decode)
|
.with_function("puny_decode", fn_puny_decode)
|
||||||
.with_function("unicode_skeleton", fn_unicode_skeleton)
|
.with_function("unicode_skeleton", fn_unicode_skeleton)
|
||||||
.with_function("cure_text", fn_cure_text)
|
.with_function("cure_text", fn_cure_text)
|
||||||
|
|
|
@ -5,7 +5,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use sieve::{runtime::Variable, Context};
|
use sieve::{runtime::Variable, Context};
|
||||||
use unicode_security::MixedScript;
|
|
||||||
|
use crate::scripts::IsMixedCharset;
|
||||||
|
|
||||||
pub fn fn_is_ascii<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
pub fn fn_is_ascii<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
||||||
match &v[0] {
|
match &v[0] {
|
||||||
|
@ -80,12 +81,12 @@ pub fn fn_unicode_skeleton<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable
|
||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fn_is_single_script<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
pub fn fn_is_mixed_charset<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
||||||
let text = v[0].to_string();
|
let text = v[0].to_string();
|
||||||
if !text.is_empty() {
|
if !text.is_empty() {
|
||||||
text.as_ref().is_single_script()
|
text.as_ref().is_mixed_charset()
|
||||||
} else {
|
} else {
|
||||||
true
|
false
|
||||||
}
|
}
|
||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ use std::sync::Arc;
|
||||||
|
|
||||||
use sieve::{runtime::Variable, Envelope};
|
use sieve::{runtime::Variable, Envelope};
|
||||||
use store::Value;
|
use store::Value;
|
||||||
|
use unicode_security::mixed_script::AugmentedScriptSet;
|
||||||
|
|
||||||
use crate::IntoString;
|
use crate::IntoString;
|
||||||
|
|
||||||
|
@ -56,3 +57,21 @@ pub fn to_store_value(value: &Variable) -> Value<'static> {
|
||||||
v => Value::Text(v.to_string().into_owned().into()),
|
v => Value::Text(v.to_string().into_owned().into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub trait IsMixedCharset {
|
||||||
|
fn is_mixed_charset(&self) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: AsRef<str>> IsMixedCharset for T {
|
||||||
|
fn is_mixed_charset(&self) -> bool {
|
||||||
|
let mut set: Option<AugmentedScriptSet> = None;
|
||||||
|
|
||||||
|
for ch in self.as_ref().chars() {
|
||||||
|
if !ch.is_ascii() {
|
||||||
|
set.get_or_insert_default().intersect_with(ch.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
set.map_or(false, |set| set.is_empty())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -7,12 +7,14 @@
|
||||||
use std::{collections::HashSet, future::Future, vec};
|
use std::{collections::HashSet, future::Future, vec};
|
||||||
|
|
||||||
use common::{
|
use common::{
|
||||||
scripts::functions::{array::cosine_similarity, unicode::CharUtils},
|
scripts::{
|
||||||
|
functions::{array::cosine_similarity, unicode::CharUtils},
|
||||||
|
IsMixedCharset,
|
||||||
|
},
|
||||||
Server,
|
Server,
|
||||||
};
|
};
|
||||||
use mail_parser::{HeaderName, MimeHeaders, PartType};
|
use mail_parser::{HeaderName, MimeHeaders, PartType};
|
||||||
use nlp::tokenizers::types::TokenType;
|
use nlp::tokenizers::types::TokenType;
|
||||||
use unicode_security::MixedScript;
|
|
||||||
|
|
||||||
use crate::{SpamFilterContext, TextPart};
|
use crate::{SpamFilterContext, TextPart};
|
||||||
|
|
||||||
|
@ -304,8 +306,8 @@ impl SpamFilterAnalyzeMime for Server {
|
||||||
|| ctx.input.message.html_body.contains(&part_id)
|
|| ctx.input.message.html_body.contains(&part_id)
|
||||||
})
|
})
|
||||||
.map_or(false, |p| match p {
|
.map_or(false, |p| match p {
|
||||||
TextPart::Plain { text_body, .. } => !text_body.is_single_script(),
|
TextPart::Plain { text_body, .. } => text_body.is_mixed_charset(),
|
||||||
TextPart::Html { text_body, .. } => !text_body.is_single_script(),
|
TextPart::Html { text_body, .. } => text_body.is_mixed_charset(),
|
||||||
TextPart::None => false,
|
TextPart::None => false,
|
||||||
})
|
})
|
||||||
{
|
{
|
||||||
|
|
|
@ -10,11 +10,11 @@ use std::{borrow::Cow, future::Future, time::Duration};
|
||||||
|
|
||||||
use common::config::spamfilter::{Element, IpResolver, Location};
|
use common::config::spamfilter::{Element, IpResolver, Location};
|
||||||
use common::scripts::functions::unicode::CharUtils;
|
use common::scripts::functions::unicode::CharUtils;
|
||||||
|
use common::scripts::IsMixedCharset;
|
||||||
use common::Server;
|
use common::Server;
|
||||||
use hyper::{header::LOCATION, Uri};
|
use hyper::{header::LOCATION, Uri};
|
||||||
use nlp::tokenizers::types::TokenType;
|
use nlp::tokenizers::types::TokenType;
|
||||||
use reqwest::redirect::Policy;
|
use reqwest::redirect::Policy;
|
||||||
use unicode_security::MixedScript;
|
|
||||||
|
|
||||||
use crate::modules::dnsbl::check_dnsbl;
|
use crate::modules::dnsbl::check_dnsbl;
|
||||||
use crate::modules::expression::StringResolver;
|
use crate::modules::expression::StringResolver;
|
||||||
|
@ -244,7 +244,7 @@ impl SpamFilterAnalyzeUrl for Server {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !host.fqdn.is_single_script() {
|
if host.fqdn.is_mixed_charset() {
|
||||||
ctx.result.add_tag("MIXED_CHARSET_URL");
|
ctx.result.add_tag("MIXED_CHARSET_URL");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1021,8 +1021,8 @@ dmarc.result pass
|
||||||
dmarc.policy reject
|
dmarc.policy reject
|
||||||
remote_ip 173.224.123.255
|
remote_ip 173.224.123.255
|
||||||
tls.version TLS1_2
|
tls.version TLS1_2
|
||||||
expect_header X-Spam-Result: DMARC_POLICY_ALLOW (-0.50), DKIM_ALLOW (-0.20), SPF_ALLOW (-0.20), ARC_NA (0.00), DKIM_SIGNED (0.00), FROM_EQ_ENV_FROM (0.00), FROM_HAS_DN (0.00), HAS_EXTERNAL_IMG (0.00), HAS_REPLYTO (0.00), HAS_X_PRIO_THREE (0.00), HTML_SHORT_1 (0.00), RCPT_COUNT_ONE (0.00), REPLYTO_DN_EQ_FROM_DN (0.00), REPLYTO_DOM_EQ_FROM_DOM (0.00), TO_DN_ALL (0.00), TO_EQ_FROM (0.00), RCVD_COUNT_ZERO (0.10), RCVD_NO_TLS_LAST (0.10), HELO_NORES_A_OR_MX (0.30), MID_RHS_NOT_FQDN (0.50), UNPARSABLE_URL (0.50), FROMHOST_NORES_A_OR_MX (1.50), DIRECT_TO_MX (2.00), FORGED_RECIPIENTS (2.00), SUBJ_ALL_CAPS (3.00)
|
expect_header X-Spam-Result: DMARC_POLICY_ALLOW (-0.50), DKIM_ALLOW (-0.20), SPF_ALLOW (-0.20), ARC_NA (0.00), DKIM_SIGNED (0.00), FROM_EQ_ENV_FROM (0.00), FROM_HAS_DN (0.00), HAS_EXTERNAL_IMG (0.00), HAS_REPLYTO (0.00), HAS_X_PRIO_THREE (0.00), HTML_SHORT_1 (0.00), RCPT_COUNT_ONE (0.00), REPLYTO_DN_EQ_FROM_DN (0.00), REPLYTO_DOM_EQ_FROM_DOM (0.00), TO_DN_ALL (0.00), TO_EQ_FROM (0.00), RCVD_COUNT_ZERO (0.10), RCVD_NO_TLS_LAST (0.10), HELO_NORES_A_OR_MX (0.30), MID_RHS_NOT_FQDN (0.50), UNPARSABLE_URL (0.50), DATE_IN_PAST (1.00), FROMHOST_NORES_A_OR_MX (1.50), DIRECT_TO_MX (2.00), FORGED_RECIPIENTS (2.00), SUBJ_ALL_CAPS (3.00)
|
||||||
expect_header X-Spam-Status: Yes, score=9.10
|
expect_header X-Spam-Status: Yes, score=10.10
|
||||||
|
|
||||||
Return-Path: <marketing@landeray.com>
|
Return-Path: <marketing@landeray.com>
|
||||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; s=default; d=landeray.com;
|
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; s=default; d=landeray.com;
|
||||||
|
|
|
@ -26,7 +26,7 @@ expect MIXED_CHARSET_URL
|
||||||
|
|
||||||
Subject: test
|
Subject: test
|
||||||
|
|
||||||
my site is https://www.xn--80ak6aa92e.com/
|
my site is https://www.xn--1ca81o6aa92e.com/
|
||||||
<!-- NEXT TEST -->
|
<!-- NEXT TEST -->
|
||||||
expect UNPARSABLE_URL
|
expect UNPARSABLE_URL
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue