mirror of
https://github.com/stalwartlabs/mail-server.git
synced 2025-09-08 21:14:13 +08:00
Port Spam filter to Rust - part 5
This commit is contained in:
parent
44ae796d9b
commit
b5696c2d26
23 changed files with 1017 additions and 459 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -6450,6 +6450,7 @@ dependencies = [
|
|||
"decancer",
|
||||
"hyper 1.5.1",
|
||||
"idna 1.0.3",
|
||||
"infer 0.16.0",
|
||||
"mail-auth",
|
||||
"mail-builder",
|
||||
"mail-parser",
|
||||
|
@ -6457,6 +6458,8 @@ dependencies = [
|
|||
"nlp",
|
||||
"psl",
|
||||
"reqwest 0.12.9",
|
||||
"sha1",
|
||||
"sha2 0.10.8",
|
||||
"smtp-proto",
|
||||
"store",
|
||||
"tokio",
|
||||
|
|
|
@ -4,11 +4,15 @@
|
|||
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
|
||||
*/
|
||||
|
||||
use std::time::Duration;
|
||||
use std::{net::SocketAddr, time::Duration};
|
||||
|
||||
use ahash::AHashSet;
|
||||
use hyper::HeaderMap;
|
||||
use mail_parser::HeaderName;
|
||||
use utils::{config::Config, glob::GlobSet};
|
||||
use utils::{
|
||||
config::Config,
|
||||
glob::{GlobMap, GlobSet},
|
||||
};
|
||||
|
||||
use super::if_block::IfBlock;
|
||||
|
||||
|
@ -19,16 +23,60 @@ pub struct SpamFilterConfig {
|
|||
pub max_rbl_email_checks: usize,
|
||||
pub max_rbl_url_checks: usize,
|
||||
|
||||
pub greylist_duration: Option<Duration>,
|
||||
|
||||
pub pyzor: Option<PyzorConfig>,
|
||||
pub asn: AsnLookupProvider,
|
||||
|
||||
pub list_dmarc_allow: GlobSet,
|
||||
pub list_spf_dkim_allow: GlobSet,
|
||||
pub list_freemail_providers: GlobSet,
|
||||
pub list_disposable_providers: GlobSet,
|
||||
pub list_trusted_domains: GlobSet,
|
||||
pub list_url_redirectors: GlobSet,
|
||||
pub list_file_extensions: GlobMap<FileExtension>,
|
||||
|
||||
pub remote_lists: Vec<RemoteListConfig>,
|
||||
pub dnsbls: Vec<DnsblConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub enum AsnLookupProvider {
|
||||
Dns {
|
||||
ipv4_zone: String,
|
||||
ipv6_zone: String,
|
||||
separator: char,
|
||||
asn_index: usize,
|
||||
country_index: Option<usize>,
|
||||
},
|
||||
Rest {
|
||||
api: String,
|
||||
timeout: Duration,
|
||||
headers: HeaderMap,
|
||||
asn_path: Vec<String>,
|
||||
country_path: Option<Vec<String>>,
|
||||
},
|
||||
#[default]
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PyzorConfig {
|
||||
pub address: SocketAddr,
|
||||
pub timeout: Duration,
|
||||
pub min_count: u64,
|
||||
pub min_wl_count: u64,
|
||||
pub ratio: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct FileExtension {
|
||||
pub known_types: AHashSet<String>,
|
||||
pub is_bad: bool,
|
||||
pub is_archive: bool,
|
||||
pub is_nz: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum Element {
|
||||
Url,
|
||||
|
|
|
@ -82,6 +82,32 @@ pub fn fn_cosine_similarity<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variabl
|
|||
.into()
|
||||
}
|
||||
|
||||
pub fn cosine_similarity(a: &[&str], b: &[&str]) -> f64 {
|
||||
let mut word_freq: HashMap<&str, [u32; 2]> = HashMap::new();
|
||||
|
||||
for (idx, items) in [a, b].into_iter().enumerate() {
|
||||
for item in items {
|
||||
word_freq.entry(item).or_insert([0, 0])[idx] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut dot_product = 0;
|
||||
let mut magnitude_a = 0;
|
||||
let mut magnitude_b = 0;
|
||||
|
||||
for (_word, count) in word_freq.iter() {
|
||||
dot_product += count[0] * count[1];
|
||||
magnitude_a += count[0] * count[0];
|
||||
magnitude_b += count[1] * count[1];
|
||||
}
|
||||
|
||||
if magnitude_a != 0 && magnitude_b != 0 {
|
||||
dot_product as f64 / (magnitude_a as f64).sqrt() / (magnitude_b as f64).sqrt()
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fn_jaccard_similarity<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
|
||||
let mut word_freq = [HashSet::new(), HashSet::new()];
|
||||
|
||||
|
|
|
@ -291,6 +291,70 @@ impl<T: SessionStream> Session<T> {
|
|||
}
|
||||
|
||||
if self.is_allowed().await {
|
||||
// Greylist
|
||||
if let Some(greylist_duration) = self
|
||||
.server
|
||||
.core
|
||||
.spam
|
||||
.greylist_duration
|
||||
.filter(|_| self.data.authenticated_as.is_none())
|
||||
{
|
||||
let key = format!(
|
||||
"g:{}:{}:{}",
|
||||
self.data.remote_ip_str,
|
||||
self.data.mail_from.as_ref().unwrap().address_lcase,
|
||||
self.data.rcpt_to.last().unwrap().address_lcase
|
||||
);
|
||||
match self
|
||||
.server
|
||||
.lookup_store()
|
||||
.key_exists(key.clone().into_bytes())
|
||||
.await
|
||||
{
|
||||
Ok(true) => (),
|
||||
Ok(false) => {
|
||||
match self
|
||||
.server
|
||||
.lookup_store()
|
||||
.key_set(key.into_bytes(), vec![], greylist_duration.as_secs().into())
|
||||
.await
|
||||
{
|
||||
Ok(_) => {
|
||||
let rcpt = self.data.rcpt_to.pop().unwrap();
|
||||
|
||||
trc::event!(
|
||||
Smtp(SmtpEvent::RcptToGreylisted),
|
||||
SpanId = self.data.session_id,
|
||||
To = rcpt.address_lcase,
|
||||
);
|
||||
|
||||
return self
|
||||
.write(
|
||||
concat!(
|
||||
"422 4.2.2 Greylisted, please try ",
|
||||
"again in a few moments.\r\n"
|
||||
)
|
||||
.as_bytes(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
Err(err) => {
|
||||
trc::error!(err
|
||||
.span_id(self.data.session_id)
|
||||
.caused_by(trc::location!())
|
||||
.details("Failed to set greylist."));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
trc::error!(err
|
||||
.span_id(self.data.session_id)
|
||||
.caused_by(trc::location!())
|
||||
.details("Failed to check greylist."));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trc::event!(
|
||||
Smtp(SmtpEvent::RcptTo),
|
||||
SpanId = self.data.session_id,
|
||||
|
|
|
@ -15,12 +15,16 @@ mail-parser = { version = "0.9", features = ["full_encoding", "ludicrous_mode"]
|
|||
mail-builder = { version = "0.3", features = ["ludicrous_mode"] }
|
||||
mail-auth = { version = "0.5" }
|
||||
mail-send = { version = "0.4", default-features = false, features = ["cram-md5", "ring", "tls12"] }
|
||||
tokio = { version = "1.23", features = ["net", "macros"] }
|
||||
psl = "2"
|
||||
hyper = { version = "1.0.1", features = ["server", "http1", "http2"] }
|
||||
idna = "1.0"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-webpki-roots", "http2", "stream"]}
|
||||
decancer = "3.0.1"
|
||||
unicode-security = "0.1.0"
|
||||
infer = "0.16"
|
||||
sha1 = "0.10"
|
||||
sha2 = "0.10.6"
|
||||
|
||||
[features]
|
||||
test_mode = []
|
||||
|
|
323
crates/spam-filter/src/analysis/html.rs
Normal file
323
crates/spam-filter/src/analysis/html.rs
Normal file
|
@ -0,0 +1,323 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Server;
|
||||
use hyper::Uri;
|
||||
use mail_parser::MimeHeaders;
|
||||
use nlp::tokenizers::types::{TokenType, TypesTokenizer};
|
||||
|
||||
use crate::{modules::html::*, Hostname, SpamFilterContext, TextPart};
|
||||
|
||||
pub trait SpamFilterAnalyzeHtml: Sync + Send {
|
||||
fn spam_filter_analyze_html(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
struct Href {
|
||||
url_parsed: Option<Uri>,
|
||||
host: Option<Hostname>,
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeHtml for Server {
|
||||
async fn spam_filter_analyze_html(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
// Message only has text/html MIME parts
|
||||
if ctx.input.message.content_type().map_or(false, |ct| {
|
||||
ct.ctype().eq_ignore_ascii_case("text")
|
||||
&& ct
|
||||
.subtype()
|
||||
.unwrap_or_default()
|
||||
.eq_ignore_ascii_case("html")
|
||||
}) {
|
||||
ctx.result.add_tag("MIME_HTML_ONLY");
|
||||
}
|
||||
let mut last_href: Option<Href> = None;
|
||||
let mut html_img_words = 0;
|
||||
let mut html_text_chars = 0;
|
||||
let mut in_head: i32 = 0;
|
||||
let mut in_body: i32 = 0;
|
||||
|
||||
for (part_id, part) in ctx.output.text_parts.iter().enumerate() {
|
||||
let is_body_part = ctx.input.message.text_body.contains(&part_id)
|
||||
|| ctx.input.message.html_body.contains(&part_id);
|
||||
|
||||
let (html_tokens, tokens) = if let TextPart::Html {
|
||||
html_tokens,
|
||||
tokens,
|
||||
..
|
||||
} = part
|
||||
{
|
||||
(html_tokens, tokens)
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let mut has_link_to_img = false;
|
||||
|
||||
for token in html_tokens {
|
||||
match token {
|
||||
HtmlToken::StartTag {
|
||||
name,
|
||||
attributes,
|
||||
is_self_closing,
|
||||
} => match *name {
|
||||
A => {
|
||||
if let Some(attr) = attributes.iter().find_map(|(attr, value)| {
|
||||
if *attr == HREF {
|
||||
value.as_deref()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
let url = attr.trim().to_lowercase();
|
||||
let url_parsed = url.parse::<Uri>().ok();
|
||||
let href = Href {
|
||||
host: url_parsed
|
||||
.as_ref()
|
||||
.and_then(|uri| uri.host().map(Hostname::new)),
|
||||
url_parsed,
|
||||
};
|
||||
|
||||
if is_body_part
|
||||
&& attr.starts_with("data:")
|
||||
&& attr.contains(";base64,")
|
||||
{
|
||||
// Has Data URI encoding
|
||||
ctx.result.add_tag("HAS_DATA_URI");
|
||||
if attr.contains("text/") {
|
||||
// Uses Data URI encoding to obfuscate plain or HTML in base64
|
||||
ctx.result.add_tag("DATA_URI_OBFU");
|
||||
}
|
||||
} else if href.host.as_ref().map_or(false, |h| h.ip.is_some()) {
|
||||
// HTML anchor points to an IP address
|
||||
ctx.result.add_tag("HTTP_TO_IP");
|
||||
}
|
||||
|
||||
if !*is_self_closing {
|
||||
last_href = Some(href);
|
||||
}
|
||||
}
|
||||
}
|
||||
IMG if is_body_part => {
|
||||
let mut img_width = 800;
|
||||
let mut img_height = 600;
|
||||
|
||||
for (attr, value) in attributes {
|
||||
if let Some(value) =
|
||||
value.as_deref().map(|v| v.trim()).filter(|v| !v.is_empty())
|
||||
{
|
||||
let dimension = match *attr {
|
||||
WIDTH => &mut img_width,
|
||||
HEIGHT => &mut img_height,
|
||||
SRC => {
|
||||
let src = value.to_ascii_lowercase();
|
||||
if src.starts_with("data:") && src.contains(";base64,")
|
||||
{
|
||||
// Has Data URI encoding
|
||||
ctx.result.add_tag("Has Data URI encoding");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
_ => {
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Some(pct) = value.strip_suffix('%') {
|
||||
if let Ok(pct) = pct.trim().parse::<u64>() {
|
||||
*dimension = (*dimension * pct) / 100;
|
||||
}
|
||||
} else if let Ok(value) = value.parse::<u64>() {
|
||||
*dimension = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
let dimensions = img_width + img_height;
|
||||
|
||||
if last_href.is_some() && dimensions >= 210 {
|
||||
has_link_to_img = true;
|
||||
}
|
||||
|
||||
if dimensions > 100 {
|
||||
// We assume that a single picture 100x200 contains approx 3 words of text
|
||||
html_img_words += dimensions / 100;
|
||||
}
|
||||
}
|
||||
META => {
|
||||
let mut has_equiv_refresh = false;
|
||||
let mut has_content_url = false;
|
||||
|
||||
for (attr, value) in attributes {
|
||||
if let Some(value) =
|
||||
value.as_deref().map(|v| v.trim()).filter(|v| !v.is_empty())
|
||||
{
|
||||
if *attr == HTTP_EQUIV {
|
||||
if value.eq_ignore_ascii_case("refresh") {
|
||||
has_equiv_refresh = true;
|
||||
}
|
||||
} else if *attr == CONTENT
|
||||
&& value.to_ascii_lowercase().contains("url=")
|
||||
{
|
||||
has_content_url = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if has_equiv_refresh && has_content_url {
|
||||
// HTML meta refresh tag
|
||||
ctx.result.add_tag("HTML_META_REFRESH_URL");
|
||||
}
|
||||
}
|
||||
LINK if is_body_part => {
|
||||
let mut has_rel_style = false;
|
||||
let mut has_href_css = false;
|
||||
|
||||
for (attr, value) in attributes {
|
||||
if let Some(value) =
|
||||
value.as_deref().map(|v| v.trim()).filter(|v| !v.is_empty())
|
||||
{
|
||||
if *attr == REL {
|
||||
if value.to_ascii_lowercase().contains("stylesheet") {
|
||||
has_rel_style = true;
|
||||
}
|
||||
} else if *attr == HREF
|
||||
&& value.to_ascii_lowercase().ends_with(".css")
|
||||
{
|
||||
has_href_css = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if has_rel_style || has_href_css {
|
||||
// Has external CSS
|
||||
ctx.result.add_tag("EXT_CSS");
|
||||
}
|
||||
}
|
||||
HEAD if !*is_self_closing => {
|
||||
in_head += 1;
|
||||
}
|
||||
BODY if !*is_self_closing => {
|
||||
in_body += 1;
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
HtmlToken::EndTag { name } => match *name {
|
||||
A => {
|
||||
last_href = None;
|
||||
}
|
||||
HEAD => {
|
||||
in_head -= 1;
|
||||
}
|
||||
BODY => {
|
||||
in_body -= 1;
|
||||
}
|
||||
_ => (),
|
||||
},
|
||||
HtmlToken::Text { text } if in_head > 0 => {
|
||||
if let Some((href_url, href_host)) = last_href
|
||||
.as_ref()
|
||||
.and_then(|href| Some((href.url_parsed.as_ref()?, href.host.as_ref()?)))
|
||||
{
|
||||
for token in TypesTokenizer::new(text.as_ref())
|
||||
.tokenize_numbers(false)
|
||||
.tokenize_urls(true)
|
||||
.tokenize_urls_without_scheme(true)
|
||||
.tokenize_emails(true)
|
||||
{
|
||||
let text_url = match token.word {
|
||||
TokenType::Url(url) => url.to_lowercase(),
|
||||
TokenType::UrlNoScheme(url) => {
|
||||
format!("http://{}", url.to_lowercase())
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
let text_url_parsed =
|
||||
if let Ok(text_url_parsed) = text_url.parse::<Uri>() {
|
||||
text_url_parsed
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if href_url.scheme().map(|s| s.as_str()).unwrap_or_default()
|
||||
== "http"
|
||||
&& text_url_parsed
|
||||
.scheme()
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or_default()
|
||||
== "https"
|
||||
{
|
||||
// The anchor text contains a distinct scheme compared to the target URL
|
||||
ctx.result.add_tag("HTTP_TO_HTTPS");
|
||||
}
|
||||
|
||||
if let Some(text_url_host) = text_url_parsed.host() {
|
||||
let text_url_host = Hostname::new(text_url_host);
|
||||
|
||||
if text_url_host.sld_or_default() != href_host.sld_or_default()
|
||||
{
|
||||
// The anchor text contains a different domain than the target URL
|
||||
ctx.result.add_tag("PHISHING");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if is_body_part {
|
||||
html_text_chars += text.chars().filter(|t| t.is_alphanumeric()).count();
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
if is_body_part {
|
||||
if in_head != 0 || in_body != 0 {
|
||||
// HTML tags are not properly closed
|
||||
ctx.result.add_tag("HTML_UNBALANCED_TAG");
|
||||
}
|
||||
|
||||
if has_link_to_img {
|
||||
match html_text_chars {
|
||||
0..1024 => {
|
||||
ctx.result.add_tag("HTML_SHORT_LINK_IMG_1");
|
||||
}
|
||||
1024..1536 => {
|
||||
ctx.result.add_tag("HTML_SHORT_LINK_IMG_2");
|
||||
}
|
||||
1536..2048 => {
|
||||
ctx.result.add_tag("HTML_SHORT_LINK_IMG_3");
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
let mut html_words = 0;
|
||||
let mut html_uris = 0;
|
||||
|
||||
for token in tokens {
|
||||
match token {
|
||||
TokenType::Alphabetic(_)
|
||||
| TokenType::Alphanumeric(_)
|
||||
| TokenType::Email(_) => {
|
||||
html_words += 1;
|
||||
}
|
||||
TokenType::Url(_) | TokenType::UrlNoScheme(_) => {
|
||||
html_uris += 1;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_link_to_img || html_text_chars >= 2048)
|
||||
&& html_img_words as f64 / (html_words as f64 + html_img_words as f64) > 0.5
|
||||
{
|
||||
// Message contains more images than text
|
||||
ctx.result.add_tag("HTML_TEXT_IMG_RATIO");
|
||||
}
|
||||
|
||||
if html_uris > 0 && html_words == 0 {
|
||||
// Message only contains URIs in HTML
|
||||
ctx.result.add_tag("BODY_URI_ONLY");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -13,14 +13,14 @@ use crate::{modules::dnsbl::is_dnsbl, SpamFilterContext, TextPart};
|
|||
|
||||
use super::{ElementLocation, SpamFilterResolver};
|
||||
|
||||
pub trait SpamFilterAnalyzeIpRev: Sync + Send {
|
||||
pub trait SpamFilterAnalyzeIp: Sync + Send {
|
||||
fn spam_filter_analyze_ip(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeIpRev for Server {
|
||||
impl SpamFilterAnalyzeIp for Server {
|
||||
async fn spam_filter_analyze_ip(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
// IP Address RBL
|
||||
let mut ips =
|
||||
|
|
426
crates/spam-filter/src/analysis/mime.rs
Normal file
426
crates/spam-filter/src/analysis/mime.rs
Normal file
|
@ -0,0 +1,426 @@
|
|||
use std::{collections::HashSet, future::Future, vec};
|
||||
|
||||
use common::{
|
||||
scripts::functions::{array::cosine_similarity, unicode::CharUtils},
|
||||
Server,
|
||||
};
|
||||
use hyper::Uri;
|
||||
use mail_parser::{HeaderName, MimeHeaders, PartType};
|
||||
use nlp::tokenizers::types::TokenType;
|
||||
use unicode_security::MixedScript;
|
||||
|
||||
use crate::{Hostname, SpamFilterContext, TextPart};
|
||||
|
||||
pub trait SpamFilterAnalyzeMime: Sync + Send {
|
||||
fn spam_filter_analyze_mime(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzeMime for Server {
|
||||
async fn spam_filter_analyze_mime(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
let mut has_mime_version = false;
|
||||
let mut has_ct = false;
|
||||
let mut has_cte = false;
|
||||
let mut had_cd = false;
|
||||
let mut is_plain_text = false;
|
||||
|
||||
for header in ctx.input.message.headers() {
|
||||
match &header.name {
|
||||
HeaderName::MimeVersion => {
|
||||
if ctx
|
||||
.input
|
||||
.message
|
||||
.raw_message()
|
||||
.get(header.offset_field..header.offset_start - 1)
|
||||
!= Some(b"MIME-Version")
|
||||
{
|
||||
ctx.result.add_tag("MV_CASE");
|
||||
}
|
||||
has_mime_version = true;
|
||||
}
|
||||
HeaderName::ContentType => {
|
||||
has_ct = true;
|
||||
is_plain_text = header.value().as_content_type().map_or(false, |ct| {
|
||||
ct.ctype().eq_ignore_ascii_case("text")
|
||||
&& ct
|
||||
.subtype()
|
||||
.unwrap_or_default()
|
||||
.eq_ignore_ascii_case("plain")
|
||||
});
|
||||
}
|
||||
HeaderName::ContentTransferEncoding => {
|
||||
has_cte = true;
|
||||
}
|
||||
HeaderName::ContentDisposition => {
|
||||
had_cd = true;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
if !has_mime_version && (has_ct || has_cte) {
|
||||
ctx.result.add_tag("MISSING_MIME_VERSION");
|
||||
}
|
||||
if has_ct && !is_plain_text && !has_cte && !had_cd {
|
||||
// Only Content-Type header without other MIME headers
|
||||
ctx.result.add_tag("MIME_HEADER_CTYPE_ONLY");
|
||||
}
|
||||
let raw_message = ctx.input.message.raw_message();
|
||||
|
||||
let mut has_text_part = false;
|
||||
let mut is_encrypted = false;
|
||||
let mut is_encrypted_smime = false;
|
||||
let mut is_encrypted_pgp = false;
|
||||
|
||||
let mut num_parts = 0;
|
||||
let mut num_parts_size = 0;
|
||||
|
||||
for (part_id, part) in ctx.input.message.parts.iter().enumerate() {
|
||||
let mut ct = None;
|
||||
let mut cd = None;
|
||||
let mut ct_type = String::new();
|
||||
let mut ct_subtype = String::new();
|
||||
let mut cte = String::new();
|
||||
let mut is_attachment = ctx.input.message.attachments.contains(&part_id);
|
||||
let mut has_content_id = false;
|
||||
|
||||
for header in part.headers() {
|
||||
match &header.name {
|
||||
HeaderName::ContentType => {
|
||||
if let Some(ct_) = header.value().as_content_type() {
|
||||
ct_type = ct_.ctype().to_ascii_lowercase();
|
||||
ct_subtype = ct_.subtype().unwrap_or_default().to_ascii_lowercase();
|
||||
ct = Some(ct_);
|
||||
}
|
||||
|
||||
if ct_type.is_empty() {
|
||||
// Content-Type header can't be parsed
|
||||
ctx.result.add_tag("BROKEN_CONTENT_TYPE");
|
||||
}
|
||||
|
||||
if raw_message
|
||||
.get(header.offset_start..header.offset_end)
|
||||
.and_then(|s| s.trim_ascii_end().last())
|
||||
== Some(&b';')
|
||||
{
|
||||
// Content-Type header ends with a semi-colon
|
||||
ctx.result.add_tag("CT_EXTRA_SEMI");
|
||||
}
|
||||
}
|
||||
HeaderName::ContentTransferEncoding => {
|
||||
let cte_ = header.value().as_text().unwrap_or_default();
|
||||
cte = cte_.to_ascii_lowercase();
|
||||
|
||||
if cte != cte_ {
|
||||
ctx.result.add_tag("CTE_CASE");
|
||||
}
|
||||
}
|
||||
HeaderName::ContentDisposition => {
|
||||
cd = header.value().as_content_type();
|
||||
}
|
||||
HeaderName::ContentId => {
|
||||
has_content_id = true;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
match ct_type.as_str() {
|
||||
"multipart" => {
|
||||
let part_ids = match &part.body {
|
||||
PartType::Multipart(parts) => parts.as_slice(),
|
||||
_ => &[],
|
||||
};
|
||||
|
||||
match ct_subtype.as_str() {
|
||||
"alternative" => {
|
||||
let mut has_plain_part = false;
|
||||
let mut has_html_part = false;
|
||||
|
||||
let mut text_part_words = vec![];
|
||||
let mut text_part_uris = 0;
|
||||
|
||||
let mut html_part_words = vec![];
|
||||
let mut html_part_uris = 0;
|
||||
|
||||
for text_part in part_ids.iter().map(|id| &ctx.output.text_parts[*id]) {
|
||||
match text_part {
|
||||
TextPart::Plain { tokens, .. } if !has_plain_part => {
|
||||
words_and_uris(
|
||||
tokens,
|
||||
&mut text_part_words,
|
||||
&mut text_part_uris,
|
||||
);
|
||||
has_plain_part = true;
|
||||
}
|
||||
TextPart::Html { tokens, .. } if !has_html_part => {
|
||||
words_and_uris(
|
||||
tokens,
|
||||
&mut html_part_words,
|
||||
&mut html_part_uris,
|
||||
);
|
||||
has_html_part = true;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
// Multipart message mostly text/html MIME
|
||||
if has_html_part {
|
||||
if !has_plain_part {
|
||||
ctx.result.add_tag("MIME_MA_MISSING_TEXT");
|
||||
}
|
||||
} else if has_plain_part {
|
||||
ctx.result.add_tag("MIME_MA_MISSING_HTML");
|
||||
}
|
||||
|
||||
// HTML and text parts are different
|
||||
if has_plain_part
|
||||
&& has_html_part
|
||||
&& (!text_part_words.is_empty() || !html_part_words.is_empty())
|
||||
&& cosine_similarity(&text_part_words, &html_part_words) < 0.95
|
||||
{
|
||||
ctx.result.add_tag("R_PARTS_DIFFER");
|
||||
}
|
||||
|
||||
// Odd URI count between parts
|
||||
if text_part_uris != html_part_uris {
|
||||
ctx.result.add_tag("URI_COUNT_ODD");
|
||||
}
|
||||
}
|
||||
"mixed" => {
|
||||
let mut num_text_parts = 0;
|
||||
let mut has_other_parts = false;
|
||||
|
||||
for (sub_part_id, sub_part) in part_ids
|
||||
.iter()
|
||||
.map(|id| (*id, &ctx.input.message.parts[*id]))
|
||||
{
|
||||
let ctype = sub_part
|
||||
.content_type()
|
||||
.map(|ct| ct.ctype())
|
||||
.unwrap_or_default();
|
||||
|
||||
if ctype.eq_ignore_ascii_case("text")
|
||||
&& !ctx.input.message.attachments.contains(&sub_part_id)
|
||||
{
|
||||
num_text_parts += 1;
|
||||
} else if !ctype.eq_ignore_ascii_case("multipart") {
|
||||
has_other_parts = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Found multipart/mixed without non-textual part
|
||||
if !has_other_parts && num_text_parts < 3 {
|
||||
ctx.result.add_tag("CTYPE_MIXED_BOGUS");
|
||||
}
|
||||
}
|
||||
"encrypted" => {
|
||||
is_encrypted = true;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
"text" => {
|
||||
let mut is_7bit = false;
|
||||
match cte.as_str() {
|
||||
"" | "7bit" => {
|
||||
if raw_message
|
||||
.get(part.raw_body_offset()..part.raw_end_offset())
|
||||
.map_or(false, |bytes| !bytes.is_ascii())
|
||||
{
|
||||
// MIME text part claims to be ASCII but isn't
|
||||
ctx.result.add_tag("R_BAD_CTE_7BIT");
|
||||
}
|
||||
is_7bit = true;
|
||||
}
|
||||
"base64" => {
|
||||
if part.contents().is_ascii() {
|
||||
// Has text part encoded in base64 that does not contain any 8bit characters
|
||||
ctx.result.add_tag("MIME_BASE64_TEXT_BOGUS");
|
||||
} else {
|
||||
// Has text part encoded in base64
|
||||
ctx.result.add_tag("MIME_BASE64_TEXT");
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if !is_7bit
|
||||
&& ct_subtype == "plain"
|
||||
&& ct
|
||||
.and_then(|ct| ct.attribute("charset"))
|
||||
.map_or(true, |c| c.is_empty())
|
||||
{
|
||||
// Charset header is missing
|
||||
ctx.result.add_tag("R_MISSING_CHARSET");
|
||||
}
|
||||
|
||||
match &part.body {
|
||||
PartType::Text(text) | PartType::Html(text)
|
||||
if ctx.input.message.text_body.contains(&part_id)
|
||||
|| ctx.input.message.html_body.contains(&part_id) =>
|
||||
{
|
||||
if !text.as_ref().is_single_script() {
|
||||
// Text part contains multiple scripts
|
||||
ctx.result.add_tag("R_MIXED_CHARSET");
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
has_text_part = true;
|
||||
}
|
||||
"application" => match ct_subtype.as_str() {
|
||||
"pkcs7-mime" => {
|
||||
ctx.result.add_tag("ENCRYPTED_SMIME");
|
||||
is_attachment = false;
|
||||
is_encrypted_smime = true;
|
||||
}
|
||||
"pkcs7-signature" => {
|
||||
ctx.result.add_tag("SIGNED_SMIME");
|
||||
is_attachment = false;
|
||||
}
|
||||
"pgp-encrypted" => {
|
||||
ctx.result.add_tag("ENCRYPTED_PGP");
|
||||
is_attachment = false;
|
||||
is_encrypted_pgp = true;
|
||||
}
|
||||
"pgp-signature" => {
|
||||
ctx.result.add_tag("SIGNED_PGP");
|
||||
is_attachment = false;
|
||||
}
|
||||
"octet-stream" => {
|
||||
if !is_encrypted
|
||||
&& !has_content_id
|
||||
&& cd.map_or(true, |cd| {
|
||||
cd.attribute("type")
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase()
|
||||
!= "attachment"
|
||||
&& !cd.has_attribute("filename")
|
||||
})
|
||||
{
|
||||
ctx.result.add_tag("CTYPE_MISSING_DISPOSITION");
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
num_parts += 1;
|
||||
num_parts_size += part.len();
|
||||
|
||||
let ct_full = format!("{ct_type}/{ct_subtype}");
|
||||
|
||||
if is_attachment {
|
||||
// Has a MIME attachment
|
||||
ctx.result.add_tag("HAS_ATTACHMENT");
|
||||
match &part.body {
|
||||
PartType::Binary(bytes) | PartType::InlineBinary(bytes) => {
|
||||
if let Some(t) = infer::get(bytes.as_ref()) {
|
||||
if t.mime_type() != ct_full {
|
||||
// Known content-type
|
||||
ctx.result.add_tag("MIME_GOOD");
|
||||
} else if ct_full != "application/octet-stream" {
|
||||
// Known bad content-type
|
||||
ctx.result.add_tag("MIME_BAD");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
// Analyze attachment name
|
||||
if let Some(attach_name) = part.attachment_name() {
|
||||
if attach_name.chars().any(|c| c.is_obscured()) {
|
||||
// Attachment name contains zero-width space
|
||||
ctx.result.add_tag("MIME_BAD_UNICODE");
|
||||
}
|
||||
let attach_name = attach_name.trim().to_lowercase();
|
||||
if let Some((name, ext)) = attach_name.rsplit_once('.').and_then(|(name, ext)| {
|
||||
Some((name, self.core.spam.list_file_extensions.get(ext)?))
|
||||
}) {
|
||||
let sub_ext = name
|
||||
.rsplit_once('.')
|
||||
.and_then(|(_, ext)| self.core.spam.list_file_extensions.get(ext));
|
||||
|
||||
if ext.is_bad {
|
||||
// Attachment has a bad extension
|
||||
if sub_ext.map_or(false, |e| e.is_bad) {
|
||||
ctx.result.add_tag("MIME_DOUBLE_BAD_EXTENSION");
|
||||
} else {
|
||||
ctx.result.add_tag("MIME_BAD_EXTENSION");
|
||||
}
|
||||
}
|
||||
|
||||
if ext.is_archive && sub_ext.map_or(false, |e| e.is_archive) {
|
||||
// Archive in archive
|
||||
ctx.result.add_tag("MIME_ARCHIVE_IN_ARCHIVE");
|
||||
}
|
||||
|
||||
if !ext.known_types.is_empty()
|
||||
&& ct_full != "application/octet-stream"
|
||||
&& !ext.known_types.contains(&ct_full)
|
||||
{
|
||||
// Invalid attachment mime type
|
||||
ctx.result.add_tag("MIME_BAD_ATTACHMENT");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match num_parts_size {
|
||||
0 => {
|
||||
// Message contains no parts
|
||||
ctx.result.add_tag("COMPLETELY_EMPTY");
|
||||
}
|
||||
1..64 if num_parts == 1 => {
|
||||
// Message contains only one short part
|
||||
ctx.result.add_tag("SINGLE_SHORT_PART");
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if has_text_part && (is_encrypted_pgp || is_encrypted_smime) {
|
||||
// Message contains both text and encrypted parts
|
||||
ctx.result.add_tag("BOGUS_ENCRYPTED_AND_TEXT");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn words_and_uris<'x, T: AsRef<str>>(
|
||||
tokens: &'x [TokenType<T>],
|
||||
words: &mut Vec<&'x str>,
|
||||
uri_count: &mut usize,
|
||||
) {
|
||||
let mut uris = HashSet::new();
|
||||
|
||||
for token in tokens {
|
||||
match token {
|
||||
TokenType::Alphabetic(v) | TokenType::Alphanumeric(v) => {
|
||||
words.push(v.as_ref());
|
||||
}
|
||||
TokenType::Url(v) => {
|
||||
if let Some(host) = v
|
||||
.as_ref()
|
||||
.parse::<Uri>()
|
||||
.ok()
|
||||
.and_then(|uri| uri.host().map(Hostname::new))
|
||||
{
|
||||
uris.insert(host.sld.unwrap_or(host.fqdn));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
*uri_count = uris.len();
|
||||
}
|
|
@ -19,9 +19,12 @@ pub mod domain;
|
|||
pub mod ehlo;
|
||||
pub mod from;
|
||||
pub mod headers;
|
||||
pub mod html;
|
||||
pub mod init;
|
||||
pub mod ip;
|
||||
pub mod messageid;
|
||||
pub mod mime;
|
||||
pub mod pyzor;
|
||||
pub mod received;
|
||||
pub mod recipient;
|
||||
pub mod replyto;
|
||||
|
|
35
crates/spam-filter/src/analysis/pyzor.rs
Normal file
35
crates/spam-filter/src/analysis/pyzor.rs
Normal file
|
@ -0,0 +1,35 @@
|
|||
use std::future::Future;
|
||||
|
||||
use common::Server;
|
||||
|
||||
use crate::{modules::pyzor::pyzor_check, SpamFilterContext};
|
||||
|
||||
pub trait SpamFilterAnalyzePyzor: Sync + Send {
|
||||
fn spam_filter_analyze_pyzor(
|
||||
&self,
|
||||
ctx: &mut SpamFilterContext<'_>,
|
||||
) -> impl Future<Output = ()> + Send;
|
||||
}
|
||||
|
||||
impl SpamFilterAnalyzePyzor for Server {
|
||||
async fn spam_filter_analyze_pyzor(&self, ctx: &mut SpamFilterContext<'_>) {
|
||||
if let Some(config) = &self.core.spam.pyzor {
|
||||
match pyzor_check(ctx.input.message, config).await {
|
||||
Ok(Some(result)) => {
|
||||
if result.code == 200
|
||||
&& result.count > config.min_count
|
||||
&& (result.wl_count < config.min_wl_count
|
||||
|| (result.wl_count as f64 / result.count as f64) < config.ratio)
|
||||
{
|
||||
ctx.result.add_tag("PYZOR");
|
||||
}
|
||||
let todo = "log time";
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(err) => {
|
||||
trc::error!(err.span_id(ctx.input.span_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -438,7 +438,7 @@ fn is_single_html_url<T: AsRef<str>>(html_tokens: &[HtmlToken], tokens: &[TokenT
|
|||
url_count = 0;
|
||||
|
||||
for token in html_tokens {
|
||||
if matches!(token, HtmlToken::StartTag { name, attributes } if *name == A && attributes.iter().any(|(k, _)| *k == HREF))
|
||||
if matches!(token, HtmlToken::StartTag { name, attributes, .. } if *name == A && attributes.iter().any(|(k, _)| *k == HREF))
|
||||
{
|
||||
url_count += 1;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ pub enum HtmlToken {
|
|||
StartTag {
|
||||
name: u64,
|
||||
attributes: Vec<(u64, Option<String>)>,
|
||||
is_self_closing: bool,
|
||||
},
|
||||
EndTag {
|
||||
name: u64,
|
||||
|
@ -18,10 +19,46 @@ pub enum HtmlToken {
|
|||
}
|
||||
|
||||
pub(crate) const A: u64 = b'a' as u64;
|
||||
pub(crate) const IMG: u64 = (b'i' as u64) | (b'm' as u64) << 8 | (b'g' as u64) << 16;
|
||||
pub(crate) const HEAD: u64 =
|
||||
(b'h' as u64) | (b'e' as u64) << 8 | (b'a' as u64) << 16 | (b'd' as u64) << 24;
|
||||
pub(crate) const BODY: u64 =
|
||||
(b'b' as u64) | (b'o' as u64) << 8 | (b'd' as u64) << 16 | (b'y' as u64) << 24;
|
||||
pub(crate) const META: u64 =
|
||||
(b'm' as u64) | (b'e' as u64) << 8 | (b't' as u64) << 16 | (b'a' as u64) << 24;
|
||||
pub(crate) const LINK: u64 =
|
||||
(b'l' as u64) | (b'i' as u64) << 8 | (b'n' as u64) << 16 | (b'k' as u64) << 24;
|
||||
|
||||
pub(crate) const HREF: u64 =
|
||||
(b'h' as u64) | (b'r' as u64) << 8 | (b'e' as u64) << 16 | (b'f' as u64) << 24;
|
||||
pub(crate) const SRC: u64 = (b's' as u64) | (b'r' as u64) << 8 | (b'c' as u64) << 16;
|
||||
pub(crate) const WIDTH: u64 = (b'w' as u64)
|
||||
| (b'i' as u64) << 8
|
||||
| (b'd' as u64) << 16
|
||||
| (b't' as u64) << 24
|
||||
| (b'h' as u64) << 32;
|
||||
pub(crate) const HEIGHT: u64 = (b'h' as u64)
|
||||
| (b'e' as u64) << 8
|
||||
| (b'i' as u64) << 16
|
||||
| (b'g' as u64) << 24
|
||||
| (b'h' as u64) << 32
|
||||
| (b't' as u64) << 40;
|
||||
pub(crate) const REL: u64 = (b'r' as u64) | (b'e' as u64) << 8 | (b'l' as u64) << 16;
|
||||
pub(crate) const CONTENT: u64 = (b'c' as u64)
|
||||
| (b'o' as u64) << 8
|
||||
| (b'n' as u64) << 16
|
||||
| (b't' as u64) << 24
|
||||
| (b'e' as u64) << 32
|
||||
| (b'n' as u64) << 40
|
||||
| (b't' as u64) << 48;
|
||||
pub(crate) const HTTP_EQUIV: u64 = (b'h' as u64)
|
||||
| (b't' as u64) << 8
|
||||
| (b't' as u64) << 16
|
||||
| (b'p' as u64) << 24
|
||||
| (b'-' as u64) << 32
|
||||
| (b'e' as u64) << 40
|
||||
| (b'q' as u64) << 48
|
||||
| (b'u' as u64) << 56;
|
||||
|
||||
pub fn html_to_tokens(input: &str) -> Vec<HtmlToken> {
|
||||
let input = input.as_bytes();
|
||||
|
@ -106,6 +143,7 @@ pub fn html_to_tokens(input: &str) -> Vec<HtmlToken> {
|
|||
}
|
||||
|
||||
let mut in_quote = false;
|
||||
let mut is_self_closing = false;
|
||||
|
||||
let mut key: u64 = 0;
|
||||
let mut shift = 0;
|
||||
|
@ -123,6 +161,9 @@ pub fn html_to_tokens(input: &str) -> Vec<HtmlToken> {
|
|||
key |= ((ch - b'A' + b'a') as u64) << shift;
|
||||
shift += 8;
|
||||
}
|
||||
b'/' if !in_quote => {
|
||||
is_self_closing = true;
|
||||
}
|
||||
b'>' if !in_quote => {
|
||||
if shift != 0 {
|
||||
if tag == 0 {
|
||||
|
@ -205,6 +246,7 @@ pub fn html_to_tokens(input: &str) -> Vec<HtmlToken> {
|
|||
tags.push(HtmlToken::StartTag {
|
||||
name: tag,
|
||||
attributes,
|
||||
is_self_closing,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -292,7 +334,8 @@ mod tests {
|
|||
tokens,
|
||||
vec![HtmlToken::StartTag {
|
||||
name: 7760228,
|
||||
attributes: vec![]
|
||||
attributes: vec![],
|
||||
is_self_closing: false
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
@ -325,14 +368,16 @@ mod tests {
|
|||
vec![
|
||||
HtmlToken::StartTag {
|
||||
name: 7760228,
|
||||
attributes: vec![]
|
||||
attributes: vec![],
|
||||
is_self_closing: false
|
||||
},
|
||||
HtmlToken::Text {
|
||||
text: "Hello,".to_string()
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 1851879539,
|
||||
attributes: vec![]
|
||||
attributes: vec![],
|
||||
is_self_closing: false
|
||||
},
|
||||
HtmlToken::Text {
|
||||
text: " \" world \"".to_string()
|
||||
|
@ -358,15 +403,18 @@ mod tests {
|
|||
attributes: vec![
|
||||
(1701869940, Some("text".to_string())),
|
||||
(435761734006, Some("test".to_string()))
|
||||
]
|
||||
],
|
||||
is_self_closing: false
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 111516266162547,
|
||||
attributes: vec![]
|
||||
attributes: vec![],
|
||||
is_self_closing: true
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 6647407,
|
||||
attributes: vec![(1920234593, None)]
|
||||
attributes: vec![(1920234593, None)],
|
||||
is_self_closing: true
|
||||
},
|
||||
HtmlToken::StartTag {
|
||||
name: 97,
|
||||
|
@ -374,7 +422,8 @@ mod tests {
|
|||
(98, Some("1".to_string())),
|
||||
(98, None),
|
||||
(99, Some("123".to_string()))
|
||||
]
|
||||
],
|
||||
is_self_closing: false
|
||||
}
|
||||
]
|
||||
);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pub mod dnsbl;
|
||||
pub mod html;
|
||||
pub mod pyzor;
|
||||
pub mod remote_list;
|
||||
pub mod sanitize;
|
||||
|
|
|
@ -4,16 +4,14 @@
|
|||
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
|
||||
*/
|
||||
|
||||
use sieve::{runtime::Variable, FunctionMap};
|
||||
|
||||
use super::PluginContext;
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
io::Write,
|
||||
net::SocketAddr,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
use common::config::spamfilter::PyzorConfig;
|
||||
use mail_parser::{decoders::html::add_html_token, Message, PartType};
|
||||
use nlp::tokenizers::types::{TokenType, TypesTokenizer};
|
||||
use sha1::{Digest, Sha1};
|
||||
|
@ -24,29 +22,27 @@ const ATOMIC_NUM_LINES: usize = 4;
|
|||
const DIGEST_SPEC: &[(usize, usize)] = &[(20, 3), (60, 3)];
|
||||
|
||||
#[derive(Default, Debug, PartialEq, Eq)]
|
||||
struct PyzorResponse {
|
||||
code: u32,
|
||||
count: u64,
|
||||
wl_count: u64,
|
||||
pub(crate) struct PyzorResponse {
|
||||
pub code: u32,
|
||||
pub count: u64,
|
||||
pub wl_count: u64,
|
||||
}
|
||||
|
||||
pub fn register(plugin_id: u32, fnc_map: &mut FunctionMap) {
|
||||
fnc_map.set_external_function("pyzor_check", plugin_id, 2);
|
||||
}
|
||||
|
||||
pub async fn exec(ctx: PluginContext<'_>) -> trc::Result<Variable> {
|
||||
pub(crate) async fn pyzor_check(
|
||||
message: &Message<'_>,
|
||||
config: &PyzorConfig,
|
||||
) -> trc::Result<Option<PyzorResponse>> {
|
||||
// Make sure there is at least one text part
|
||||
if !ctx
|
||||
.message
|
||||
if !message
|
||||
.parts
|
||||
.iter()
|
||||
.any(|p| matches!(p.body, PartType::Text(_) | PartType::Html(_)))
|
||||
{
|
||||
return Ok(Variable::default());
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Hash message
|
||||
let request = ctx.message.pyzor_check_message();
|
||||
let request = message.pyzor_check_message();
|
||||
|
||||
#[cfg(feature = "test_mode")]
|
||||
{
|
||||
|
@ -74,35 +70,21 @@ pub async fn exec(ctx: PluginContext<'_>) -> trc::Result<Variable> {
|
|||
}
|
||||
}
|
||||
|
||||
let address = ctx.arguments[0].to_string();
|
||||
let timeout = Duration::from_secs((ctx.arguments[1].to_integer() as u64).clamp(5, 60));
|
||||
|
||||
// Send message to address
|
||||
pyzor_send_message(address.as_ref(), timeout, &request)
|
||||
pyzor_send_message(config.address, config.timeout, &request)
|
||||
.await
|
||||
.map(Into::into)
|
||||
.map_err(|err| {
|
||||
trc::SpamEvent::PyzorError
|
||||
.into_err()
|
||||
.ctx(trc::Key::Url, address.to_string())
|
||||
.ctx(trc::Key::Url, config.address.to_string())
|
||||
.reason(err)
|
||||
.details("Pyzor failed")
|
||||
})
|
||||
}
|
||||
|
||||
impl From<PyzorResponse> for Variable {
|
||||
fn from(response: PyzorResponse) -> Self {
|
||||
vec![
|
||||
Variable::from(response.code),
|
||||
Variable::from(response.count),
|
||||
Variable::from(response.wl_count),
|
||||
]
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
async fn pyzor_send_message(
|
||||
addr: &str,
|
||||
addr: SocketAddr,
|
||||
timeout: Duration,
|
||||
message: &str,
|
||||
) -> std::io::Result<PyzorResponse> {
|
||||
|
@ -451,7 +433,7 @@ mod test {
|
|||
async fn send_message() {
|
||||
assert_eq!(
|
||||
pyzor_send_message(
|
||||
"public.pyzor.org:24441",
|
||||
"public.pyzor.org:24441".parse().unwrap(),
|
||||
Duration::from_secs(10),
|
||||
concat!(
|
||||
"Op: check\n",
|
||||
|
|
|
@ -442,6 +442,7 @@ impl SmtpEvent {
|
|||
SmtpEvent::RcptToDuplicate => "Duplicate RCPT TO",
|
||||
SmtpEvent::RcptToRewritten => "RCPT TO address rewritten",
|
||||
SmtpEvent::RcptToMissing => "RCPT TO address missing",
|
||||
SmtpEvent::RcptToGreylisted => "RCPT TO greylisted",
|
||||
SmtpEvent::TooManyRecipients => "Too many recipients",
|
||||
SmtpEvent::TooManyInvalidRcpt => "Too many invalid recipients",
|
||||
SmtpEvent::RawInput => "Raw SMTP input received",
|
||||
|
@ -552,6 +553,7 @@ impl SmtpEvent {
|
|||
}
|
||||
SmtpEvent::RcptToRewritten => "The envelope recipient address was rewritten",
|
||||
SmtpEvent::RcptToMissing => "The remote client issued a DATA command before RCPT TO",
|
||||
SmtpEvent::RcptToGreylisted => "The recipient was greylisted",
|
||||
SmtpEvent::TooManyRecipients => {
|
||||
"The remote client exceeded the number of recipients allowed"
|
||||
}
|
||||
|
|
|
@ -186,6 +186,7 @@ impl EventType {
|
|||
| SmtpEvent::MailboxDoesNotExist
|
||||
| SmtpEvent::RelayNotAllowed
|
||||
| SmtpEvent::RcptTo
|
||||
| SmtpEvent::RcptToGreylisted
|
||||
| SmtpEvent::TooManyInvalidRcpt
|
||||
| SmtpEvent::Vrfy
|
||||
| SmtpEvent::VrfyNotFound
|
||||
|
|
|
@ -393,6 +393,7 @@ pub enum SmtpEvent {
|
|||
RcptToDuplicate,
|
||||
RcptToRewritten,
|
||||
RcptToMissing,
|
||||
RcptToGreylisted,
|
||||
TooManyRecipients,
|
||||
TooManyInvalidRcpt,
|
||||
RawInput,
|
||||
|
|
|
@ -866,6 +866,7 @@ impl EventType {
|
|||
EventType::Security(SecurityEvent::ScanBan) => 558,
|
||||
EventType::Store(StoreEvent::AzureError) => 559,
|
||||
EventType::TlsRpt(TlsRptEvent::RecordNotFound) => 560,
|
||||
EventType::Smtp(SmtpEvent::RcptToGreylisted) => 561,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1472,6 +1473,7 @@ impl EventType {
|
|||
558 => Some(EventType::Security(SecurityEvent::ScanBan)),
|
||||
559 => Some(EventType::Store(StoreEvent::AzureError)),
|
||||
560 => Some(EventType::TlsRpt(TlsRptEvent::RecordNotFound)),
|
||||
561 => Some(EventType::Smtp(SmtpEvent::RcptToGreylisted)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
|
||||
set "triplet" "g:${env.remote_ip}.${envelope.from}.${envelope.to}";
|
||||
|
||||
if eval "!key_exists(SPAM_DB, triplet)" {
|
||||
# Greylist sender for 30 days
|
||||
eval "key_set(SPAM_DB, triplet, '', 2592000)";
|
||||
reject "422 4.2.2 Greylisted, please try again in a few moments.";
|
||||
stop;
|
||||
}
|
|
@ -1,148 +0,0 @@
|
|||
|
||||
# Message only has text/html MIME parts
|
||||
if eval "header.content-type == 'text/html'" {
|
||||
let "t.MIME_HTML_ONLY" "1";
|
||||
}
|
||||
|
||||
foreverypart {
|
||||
if eval "eq_ignore_case(header.content-type, 'text/html')" {
|
||||
# Tokenize HTML
|
||||
let "is_body_part" "is_body()";
|
||||
let "html_tokens" "tokenize(part.text, 'html')";
|
||||
let "html_tokens_len" "len(html_tokens)";
|
||||
let "html_char_count" "0";
|
||||
let "html_space_count" "0";
|
||||
let "html_img_words" "0";
|
||||
let "html_words" "0";
|
||||
let "has_link_to_img" "0";
|
||||
let "has_uri" "0";
|
||||
let "has_text" "0";
|
||||
let "in_head" "0";
|
||||
let "in_body" "0";
|
||||
let "in_anchor" "0";
|
||||
let "in_anchor_href_ip" "0";
|
||||
let "in_anchor_href" "";
|
||||
|
||||
let "i" "0";
|
||||
while "i < html_tokens_len" {
|
||||
let "token" "html_tokens[i]";
|
||||
let "i" "i + 1";
|
||||
|
||||
# Tokens starting with '_' are text nodes
|
||||
if eval "starts_with(token, '_')" {
|
||||
if eval "in_head == 0" {
|
||||
let "html_char_count" "html_char_count + count_chars(token)";
|
||||
let "html_space_count" "html_space_count + count_spaces(token)";
|
||||
|
||||
let "text" "to_lowercase(trim(strip_prefix(token, '_')))";
|
||||
let "html_words" "html_words + len(tokenize(text, 'words'))";
|
||||
|
||||
let "uris" "tokenize(text, 'uri')";
|
||||
|
||||
if eval "!is_empty(uris)" {
|
||||
let "has_uri" "1";
|
||||
let "uri" "uris[0]";
|
||||
|
||||
if eval "in_anchor && !is_empty(in_anchor_href)" {
|
||||
if eval "contains(text, '://') &&
|
||||
uri_part(uri, 'scheme') != uri_part(in_anchor_href, 'scheme')" {
|
||||
# The anchor text contains a distinct scheme compared to the target URL
|
||||
let "t.HTTP_TO_HTTPS" "1";
|
||||
}
|
||||
if eval "(!in_anchor_href_ip && (domain_part(uri_part(uri, 'host'), 'sld') != domain_part(uri_part(in_anchor_href, 'host'), 'sld'))) ||
|
||||
(in_anchor_href_ip && (uri_part(uri, 'host') != uri_part(in_anchor_href, 'host')))" {
|
||||
let "t.PHISHING" "1";
|
||||
}
|
||||
}
|
||||
} elsif eval "!is_empty(text)" {
|
||||
let "has_text" "1";
|
||||
}
|
||||
}
|
||||
} elsif eval "starts_with(token, '<img')" {
|
||||
if eval "is_body_part" {
|
||||
let "dimensions" "html_attr_size(token, 'width', 800) + html_attr_size(token, 'height', 600)";
|
||||
|
||||
if eval "in_anchor && dimensions >= 210" {
|
||||
let "has_link_to_img" "1";
|
||||
}
|
||||
if eval "dimensions > 100" {
|
||||
# We assume that a single picture 100x200 contains approx 3 words of text
|
||||
let "html_img_words" "html_img_words + dimensions / 100";
|
||||
}
|
||||
|
||||
let "img_src" "html_attr(token, 'src')";
|
||||
if eval "starts_with(img_src, 'data:') && contains(img_src, ';base64,')" {
|
||||
# Has Data URI encoding
|
||||
let "t.HAS_DATA_URI" "1";
|
||||
}
|
||||
}
|
||||
} elsif eval "starts_with(token, '<head')" {
|
||||
let "in_head" "in_head + 1";
|
||||
} elsif eval "starts_with(token, '</head')" {
|
||||
let "in_head" "in_head - 1";
|
||||
} elsif eval "starts_with(token, '<body')" {
|
||||
let "in_body" "in_body + 1";
|
||||
} elsif eval "starts_with(token, '</body')" {
|
||||
let "in_body" "in_body - 1";
|
||||
} elsif eval "starts_with(token, '<a ')" {
|
||||
let "in_anchor" "1";
|
||||
let "in_anchor_href_ip" "0";
|
||||
let "in_anchor_href" "to_lowercase(trim(html_attr(token, 'href')))";
|
||||
|
||||
if eval "is_body_part && starts_with(in_anchor_href, 'data:') && contains(in_anchor_href, ';base64,')" {
|
||||
# Has Data URI encoding
|
||||
let "t.HAS_DATA_URI" "1";
|
||||
if eval "contains(in_anchor_href, 'text/')" {
|
||||
# Uses Data URI encoding to obfuscate plain or HTML in base64
|
||||
let "t.DATA_URI_OBFU" "1";
|
||||
}
|
||||
} elsif eval "is_ip_addr(uri_part(in_anchor_href, 'host'))" {
|
||||
# HTML anchor points to an IP address
|
||||
let "t.HTTP_TO_IP" "1";
|
||||
let "in_anchor_href_ip" "1";
|
||||
}
|
||||
} elsif eval "in_anchor && starts_with(token, '</a')" {
|
||||
let "in_anchor" "0";
|
||||
} elsif eval "starts_with(token, '<meta ')" {
|
||||
if eval "eq_ignore_case(html_attr(token, 'http-equiv'), 'refresh') &&
|
||||
contains_ignore_case(html_attr(token, 'content'), 'url=')" {
|
||||
# HTML meta refresh tag
|
||||
let "t.HTML_META_REFRESH_URL" "1";
|
||||
}
|
||||
} elsif eval "starts_with(token, '<link') && is_body_part &&
|
||||
(contains_ignore_case(html_attr(token, 'rel'), 'stylesheet') ||
|
||||
contains_ignore_case(html_attr(token, 'href'), '.css') )" {
|
||||
let "t.EXT_CSS" "1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "is_body_part" {
|
||||
# Check for unbalanced tags
|
||||
if eval "in_head != 0 || in_body != 0" {
|
||||
let "t.HTML_UNBALANCED_TAG" "1";
|
||||
}
|
||||
|
||||
# Check for short HTML parts with a link to an image
|
||||
if eval "has_link_to_img" {
|
||||
if eval "html_char_count < 1024" {
|
||||
let "t.HTML_SHORT_LINK_IMG_1" "1";
|
||||
} elsif eval "html_char_count < 1536" {
|
||||
let "t.HTML_SHORT_LINK_IMG_2" "1";
|
||||
} elsif eval "html_char_count < 2048" {
|
||||
let "t.HTML_SHORT_LINK_IMG_3" "1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "(!has_link_to_img || html_char_count >= 2048) &&
|
||||
(html_img_words / (html_words + html_img_words) > 0.5)" {
|
||||
# Message contains more images than text
|
||||
let "t.HTML_TEXT_IMG_RATIO" "1";
|
||||
}
|
||||
|
||||
if eval "has_uri && !has_text" {
|
||||
let "t.BODY_URI_ONLY" "1";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,232 +0,0 @@
|
|||
if eval "!header.mime-version.exists" {
|
||||
if eval "header.content-type.exists || header.content-transfer-encoding.exists" {
|
||||
let "t.MISSING_MIME_VERSION" "1";
|
||||
}
|
||||
} elsif eval "header.mime-version.raw_name != 'MIME-Version'" {
|
||||
let "t.MV_CASE" "1";
|
||||
}
|
||||
|
||||
let "has_text_part" "0";
|
||||
let "is_encrypted" "0";
|
||||
let "parts_num" "0";
|
||||
let "parts_max_len" "0";
|
||||
|
||||
if eval "header.Content-Type.exists && !header.Content-Disposition:Content-Transfer-Encoding:MIME-Version.exists && !eq_ignore_case(header.Content-Type, 'text/plain')" {
|
||||
# Only Content-Type header without other MIME headers
|
||||
let "t.MIME_HEADER_CTYPE_ONLY" "1";
|
||||
}
|
||||
|
||||
foreverypart {
|
||||
let "content_type" "to_lowercase(header.content-type)";
|
||||
let "type" "to_lowercase(header.content-type.type)";
|
||||
let "subtype" "to_lowercase(header.content-type.subtype)";
|
||||
let "cte" "header.content-transfer-encoding";
|
||||
let "part_is_attachment" "is_attachment()";
|
||||
|
||||
if eval "cte != '' && !is_lowercase(cte)" {
|
||||
let "cte" "to_lowercase(cte)";
|
||||
let "t.CTE_CASE" "1";
|
||||
}
|
||||
|
||||
if eval "ends_with(header.content-type.raw, ';')" {
|
||||
# Content-Type header ends with a semi-colon
|
||||
let "t.CT_EXTRA_SEMI" "1";
|
||||
}
|
||||
|
||||
if eval "type == 'multipart'" {
|
||||
if eval "subtype == 'alternative'" {
|
||||
let "has_plain_part" "0";
|
||||
let "has_html_part" "0";
|
||||
|
||||
let "text_part_words" "";
|
||||
let "text_part_uris" "0";
|
||||
|
||||
let "html_part_words" "";
|
||||
let "html_part_uris" "0";
|
||||
|
||||
foreverypart {
|
||||
let "ma_ct" "to_lowercase(header.content-type)";
|
||||
|
||||
if eval "!has_plain_part && ma_ct == 'text/plain'" {
|
||||
let "text_part" "part.text";
|
||||
let "text_part_words" "tokenize(text_part, 'words')";
|
||||
let "text_part_uris" "count(dedup(uri_part(tokenize(text_part, 'uri_strict'), 'host')))";
|
||||
let "has_plain_part" "1";
|
||||
} elsif eval "!has_html_part && ma_ct == 'text/html'" {
|
||||
let "html_part" "html_to_text(part.text)";
|
||||
let "html_part_words" "tokenize(html_part, 'words')";
|
||||
let "html_part_uris" "count(dedup(uri_part(tokenize(part.text, 'uri_strict'), 'host')))";
|
||||
let "has_html_part" "1";
|
||||
}
|
||||
}
|
||||
|
||||
# Multipart message mostly text/html MIME
|
||||
if eval "has_html_part" {
|
||||
if eval "!has_plain_part" {
|
||||
let "t.MIME_MA_MISSING_TEXT" "1";
|
||||
}
|
||||
} elsif eval "has_plain_part" {
|
||||
let "t.MIME_MA_MISSING_HTML" "1";
|
||||
}
|
||||
|
||||
# HTML and text parts are different
|
||||
if eval "!t.R_PARTS_DIFFER && has_html_part && has_plain_part &&
|
||||
(!is_empty(text_part_words) || !is_empty(html_part_words)) &&
|
||||
cosine_similarity(text_part_words, html_part_words) < 0.95" {
|
||||
let "t.R_PARTS_DIFFER" "1";
|
||||
}
|
||||
|
||||
# Odd URI count between parts
|
||||
if eval "text_part_uris != html_part_uris" {
|
||||
set "t.URI_COUNT_ODD" "1";
|
||||
}
|
||||
} elsif eval "subtype == 'mixed'" {
|
||||
let "num_text_parts" "0";
|
||||
let "has_other_part" "0";
|
||||
|
||||
foreverypart {
|
||||
if eval "eq_ignore_case(header.content-type.type, 'text') && !is_attachment()" {
|
||||
let "num_text_parts" "num_text_parts + 1";
|
||||
} elsif eval "!eq_ignore_case(header.content-type.type, 'multipart')" {
|
||||
let "has_other_part" "1";
|
||||
}
|
||||
}
|
||||
|
||||
# Found multipart/mixed without non-textual part
|
||||
if eval "!has_other_part && num_text_parts < 3" {
|
||||
let "t.CTYPE_MIXED_BOGUS" "1";
|
||||
}
|
||||
} elsif eval "subtype == 'encrypted'" {
|
||||
set "is_encrypted" "1";
|
||||
}
|
||||
} else {
|
||||
if eval "type == 'text'" {
|
||||
# MIME text part claims to be ASCII but isn't
|
||||
if eval "cte == '' || cte == '7bit'" {
|
||||
if eval "!is_ascii(part.raw)" {
|
||||
let "t.R_BAD_CTE_7BIT" "1";
|
||||
}
|
||||
} else {
|
||||
if eval "cte == 'base64'" {
|
||||
if eval "is_ascii(part.text)" {
|
||||
# Has text part encoded in base64 that does not contain any 8bit characters
|
||||
let "t.MIME_BASE64_TEXT_BOGUS" "1";
|
||||
} else {
|
||||
# Has text part encoded in base64
|
||||
let "t.MIME_BASE64_TEXT" "1";
|
||||
}
|
||||
}
|
||||
|
||||
if eval "subtype == 'plain' && is_empty(header.content-type.attr.charset)" {
|
||||
# Charset header is missing
|
||||
let "t.R_MISSING_CHARSET" "1";
|
||||
}
|
||||
}
|
||||
let "has_text_part" "1";
|
||||
} elsif eval "type == 'application'" {
|
||||
if eval "subtype == 'pkcs7-mime'" {
|
||||
let "t.ENCRYPTED_SMIME" "1";
|
||||
let "part_is_attachment" "0";
|
||||
} elsif eval "subtype == 'pkcs7-signature'" {
|
||||
let "t.SIGNED_SMIME" "1";
|
||||
let "part_is_attachment" "0";
|
||||
} elsif eval "subtype == 'pgp-encrypted'" {
|
||||
let "t.ENCRYPTED_PGP" "1";
|
||||
let "part_is_attachment" "0";
|
||||
} elsif eval "subtype == 'pgp-signature'" {
|
||||
let "t.SIGNED_PGP" "1";
|
||||
let "part_is_attachment" "0";
|
||||
} elsif eval "subtype == 'octet-stream'" {
|
||||
if eval "!is_encrypted &&
|
||||
!header.content-id.exists &&
|
||||
(!header.content-disposition.exists ||
|
||||
(!eq_ignore_case(header.content-disposition.type, 'attachment') &&
|
||||
is_empty(header.content-disposition.attr.filename)))" {
|
||||
let "t.CTYPE_MISSING_DISPOSITION" "1";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Increase part count
|
||||
let "parts_num" "parts_num + 1";
|
||||
if eval "parts_num == 1" {
|
||||
let "parts_len" "mime_part_len()";
|
||||
if eval "parts_len > parts_max_len" {
|
||||
let "parts_max_len" "parts_len";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if eval "is_empty(type) && header.content-type.exists" {
|
||||
let "t.BROKEN_CONTENT_TYPE" "1";
|
||||
}
|
||||
|
||||
if eval "part_is_attachment" {
|
||||
# Has a MIME attachment
|
||||
let "t.HAS_ATTACHMENT" "1";
|
||||
|
||||
# Detect and compare mime type
|
||||
let "detected_mime_type" "detect_file_type('mime')";
|
||||
if eval "!is_empty(detected_mime_type)" {
|
||||
if eval "detected_mime_type == content_type" {
|
||||
# Known content-type
|
||||
let "t.MIME_GOOD" "1";
|
||||
} elsif eval "content_type != 'application/octet-stream'" {
|
||||
# Known bad content-type
|
||||
let "t.MIME_BAD" "1";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Analyze attachment name
|
||||
let "attach_name" "attachment_name()";
|
||||
if eval "!is_empty(attach_name)" {
|
||||
if eval "has_obscured(attach_name)" {
|
||||
let "t.MIME_BAD_UNICODE" "1";
|
||||
}
|
||||
let "name_parts" "rsplit(to_lowercase(attach_name), '.')";
|
||||
if eval "count(name_parts) > 1" {
|
||||
let "ext_type" "key_get('spam-mime', name_parts[0])";
|
||||
if eval "!is_empty(ext_type)" {
|
||||
let "ext_type_double" "key_get('spam-mime', name_parts[1])";
|
||||
if eval "contains(ext_type, 'BAD')" {
|
||||
# Bad extension
|
||||
if eval "contains(ext_type_double, 'BAD')" {
|
||||
let "t.MIME_DOUBLE_BAD_EXTENSION" "1";
|
||||
} else {
|
||||
let "t.MIME_BAD_EXTENSION" "1";
|
||||
}
|
||||
}
|
||||
if eval "contains(ext_type, 'AR') && contains(ext_type_double, 'AR')" {
|
||||
# Archive in archive
|
||||
let "t.MIME_ARCHIVE_IN_ARCHIVE" "1";
|
||||
}
|
||||
|
||||
if eval "contains(ext_type, '/') &&
|
||||
content_type != 'application/octet-stream' &&
|
||||
!contains(split(ext_type, '|'), content_type)" {
|
||||
# Invalid attachment mime type
|
||||
let "t.MIME_BAD_ATTACHMENT" "1";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Message contains both text and encrypted parts
|
||||
if eval "has_text_part && (t.ENCRYPTED_SMIME || t.ENCRYPTED_PGP)" {
|
||||
let "t.BOGUS_ENCRYPTED_AND_TEXT" "1";
|
||||
}
|
||||
|
||||
# Message contains only one short part
|
||||
if eval "parts_num == 1 && parts_max_len < 64" {
|
||||
let "t.SINGLE_SHORT_PART" "1";
|
||||
} elsif eval "parts_max_len == 0" {
|
||||
let "t.COMPLETELY_EMPTY" "1";
|
||||
}
|
||||
|
||||
# Check for mixed script in body
|
||||
if eval "!is_single_script(text_body)" {
|
||||
let "t.R_MIXED_CHARSET" "1";
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
# Check message hash against Pyzor on public.pyzor.org:24441 using a 5 second timeout
|
||||
let "pyzor_response" "pyzor_check('public.pyzor.org:24441', 5)";
|
||||
|
||||
if eval "!is_empty(pyzor_response) && pyzor_response[0] == 200" {
|
||||
let "count" "pyzor_response[1]";
|
||||
let "wl_count" "pyzor_response[2]";
|
||||
|
||||
if eval "count > 5 && (wl_count < 10 || wl_count / count < 0.2)" {
|
||||
let "t.PYZOR" "1";
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
|
||||
|
||||
# Obtain thread name and subject
|
||||
let "contents" "thread_name(header.subject) + ' ' + body.to_text";
|
||||
|
||||
if eval "env.train == 'spam'" {
|
||||
eval "bayes_train(SPAM_DB, contents, true)";
|
||||
} elsif eval "env.train == 'ham'" {
|
||||
eval "bayes_train(SPAM_DB, contents, false)";
|
||||
} else {
|
||||
reject "Missing variable 'train'";
|
||||
}
|
Loading…
Add table
Reference in a new issue