mirror of
https://github.com/stalwartlabs/mail-server.git
synced 2025-10-06 02:34:43 +08:00
Fixed MIXED_CHARSET spam filter check
This commit is contained in:
parent
cae7d43e6f
commit
043b53f3e9
7 changed files with 36 additions and 14 deletions
|
@ -51,7 +51,7 @@ pub fn register_functions_trusted() -> FunctionMap {
|
|||
.with_function("winnow", fn_winnow)
|
||||
.with_function("has_zwsp", fn_has_zwsp)
|
||||
.with_function("has_obscured", fn_has_obscured)
|
||||
.with_function("is_single_script", fn_is_single_script)
|
||||
.with_function("is_mixed_charset", fn_is_mixed_charset)
|
||||
.with_function("puny_decode", fn_puny_decode)
|
||||
.with_function("unicode_skeleton", fn_unicode_skeleton)
|
||||
.with_function("cure_text", fn_cure_text)
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
*/
|
||||
|
||||
use sieve::{runtime::Variable, Context};
|
||||
use unicode_security::MixedScript;
|
||||
|
||||
use crate::scripts::IsMixedCharset;
|
||||
|
||||
pub fn fn_is_ascii<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
||||
match &v[0] {
|
||||
|
@ -80,12 +81,12 @@ pub fn fn_unicode_skeleton<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable
|
|||
.into()
|
||||
}
|
||||
|
||||
pub fn fn_is_single_script<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
||||
pub fn fn_is_mixed_charset<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
||||
let text = v[0].to_string();
|
||||
if !text.is_empty() {
|
||||
text.as_ref().is_single_script()
|
||||
text.as_ref().is_mixed_charset()
|
||||
} else {
|
||||
true
|
||||
false
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ use std::sync::Arc;
|
|||
|
||||
use sieve::{runtime::Variable, Envelope};
|
||||
use store::Value;
|
||||
use unicode_security::mixed_script::AugmentedScriptSet;
|
||||
|
||||
use crate::IntoString;
|
||||
|
||||
|
@ -56,3 +57,21 @@ pub fn to_store_value(value: &Variable) -> Value<'static> {
|
|||
v => Value::Text(v.to_string().into_owned().into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IsMixedCharset {
|
||||
fn is_mixed_charset(&self) -> bool;
|
||||
}
|
||||
|
||||
impl<T: AsRef<str>> IsMixedCharset for T {
|
||||
fn is_mixed_charset(&self) -> bool {
|
||||
let mut set: Option<AugmentedScriptSet> = None;
|
||||
|
||||
for ch in self.as_ref().chars() {
|
||||
if !ch.is_ascii() {
|
||||
set.get_or_insert_default().intersect_with(ch.into());
|
||||
}
|
||||
}
|
||||
|
||||
set.map_or(false, |set| set.is_empty())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,12 +7,14 @@
|
|||
use std::{collections::HashSet, future::Future, vec};
|
||||
|
||||
use common::{
|
||||
scripts::functions::{array::cosine_similarity, unicode::CharUtils},
|
||||
scripts::{
|
||||
functions::{array::cosine_similarity, unicode::CharUtils},
|
||||
IsMixedCharset,
|
||||
},
|
||||
Server,
|
||||
};
|
||||
use mail_parser::{HeaderName, MimeHeaders, PartType};
|
||||
use nlp::tokenizers::types::TokenType;
|
||||
use unicode_security::MixedScript;
|
||||
|
||||
use crate::{SpamFilterContext, TextPart};
|
||||
|
||||
|
@ -304,8 +306,8 @@ impl SpamFilterAnalyzeMime for Server {
|
|||
|| ctx.input.message.html_body.contains(&part_id)
|
||||
})
|
||||
.map_or(false, |p| match p {
|
||||
TextPart::Plain { text_body, .. } => !text_body.is_single_script(),
|
||||
TextPart::Html { text_body, .. } => !text_body.is_single_script(),
|
||||
TextPart::Plain { text_body, .. } => text_body.is_mixed_charset(),
|
||||
TextPart::Html { text_body, .. } => text_body.is_mixed_charset(),
|
||||
TextPart::None => false,
|
||||
})
|
||||
{
|
||||
|
|
|
@ -10,11 +10,11 @@ use std::{borrow::Cow, future::Future, time::Duration};
|
|||
|
||||
use common::config::spamfilter::{Element, IpResolver, Location};
|
||||
use common::scripts::functions::unicode::CharUtils;
|
||||
use common::scripts::IsMixedCharset;
|
||||
use common::Server;
|
||||
use hyper::{header::LOCATION, Uri};
|
||||
use nlp::tokenizers::types::TokenType;
|
||||
use reqwest::redirect::Policy;
|
||||
use unicode_security::MixedScript;
|
||||
|
||||
use crate::modules::dnsbl::check_dnsbl;
|
||||
use crate::modules::expression::StringResolver;
|
||||
|
@ -244,7 +244,7 @@ impl SpamFilterAnalyzeUrl for Server {
|
|||
}
|
||||
}
|
||||
|
||||
if !host.fqdn.is_single_script() {
|
||||
if host.fqdn.is_mixed_charset() {
|
||||
ctx.result.add_tag("MIXED_CHARSET_URL");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1021,8 +1021,8 @@ dmarc.result pass
|
|||
dmarc.policy reject
|
||||
remote_ip 173.224.123.255
|
||||
tls.version TLS1_2
|
||||
expect_header X-Spam-Result: DMARC_POLICY_ALLOW (-0.50), DKIM_ALLOW (-0.20), SPF_ALLOW (-0.20), ARC_NA (0.00), DKIM_SIGNED (0.00), FROM_EQ_ENV_FROM (0.00), FROM_HAS_DN (0.00), HAS_EXTERNAL_IMG (0.00), HAS_REPLYTO (0.00), HAS_X_PRIO_THREE (0.00), HTML_SHORT_1 (0.00), RCPT_COUNT_ONE (0.00), REPLYTO_DN_EQ_FROM_DN (0.00), REPLYTO_DOM_EQ_FROM_DOM (0.00), TO_DN_ALL (0.00), TO_EQ_FROM (0.00), RCVD_COUNT_ZERO (0.10), RCVD_NO_TLS_LAST (0.10), HELO_NORES_A_OR_MX (0.30), MID_RHS_NOT_FQDN (0.50), UNPARSABLE_URL (0.50), FROMHOST_NORES_A_OR_MX (1.50), DIRECT_TO_MX (2.00), FORGED_RECIPIENTS (2.00), SUBJ_ALL_CAPS (3.00)
|
||||
expect_header X-Spam-Status: Yes, score=9.10
|
||||
expect_header X-Spam-Result: DMARC_POLICY_ALLOW (-0.50), DKIM_ALLOW (-0.20), SPF_ALLOW (-0.20), ARC_NA (0.00), DKIM_SIGNED (0.00), FROM_EQ_ENV_FROM (0.00), FROM_HAS_DN (0.00), HAS_EXTERNAL_IMG (0.00), HAS_REPLYTO (0.00), HAS_X_PRIO_THREE (0.00), HTML_SHORT_1 (0.00), RCPT_COUNT_ONE (0.00), REPLYTO_DN_EQ_FROM_DN (0.00), REPLYTO_DOM_EQ_FROM_DOM (0.00), TO_DN_ALL (0.00), TO_EQ_FROM (0.00), RCVD_COUNT_ZERO (0.10), RCVD_NO_TLS_LAST (0.10), HELO_NORES_A_OR_MX (0.30), MID_RHS_NOT_FQDN (0.50), UNPARSABLE_URL (0.50), DATE_IN_PAST (1.00), FROMHOST_NORES_A_OR_MX (1.50), DIRECT_TO_MX (2.00), FORGED_RECIPIENTS (2.00), SUBJ_ALL_CAPS (3.00)
|
||||
expect_header X-Spam-Status: Yes, score=10.10
|
||||
|
||||
Return-Path: <marketing@landeray.com>
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; s=default; d=landeray.com;
|
||||
|
|
|
@ -26,7 +26,7 @@ expect MIXED_CHARSET_URL
|
|||
|
||||
Subject: test
|
||||
|
||||
my site is https://www.xn--80ak6aa92e.com/
|
||||
my site is https://www.xn--1ca81o6aa92e.com/
|
||||
<!-- NEXT TEST -->
|
||||
expect UNPARSABLE_URL
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue