Updated IMAP autolearn behaviour to deal with Apple Mail's shenanigans
Some checks are pending
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run

This commit is contained in:
mdecimus 2025-12-14 16:17:39 +01:00
parent e1c1e9a6d4
commit 4675b18491
5 changed files with 31 additions and 18 deletions

View file

@ -451,21 +451,24 @@ impl PyzorConfig {
impl ClassifierConfig {
pub fn parse(config: &mut Config) -> Option<Self> {
if !config
.property_or_default("spam-filter.classifier.enable", "true")
.unwrap_or(true)
{
let ccfh = match config.value("spam-filter.classifier.model") {
Some("ftrl-fh") | None => false,
Some("ftrl-ccfh") => true,
Some("disabled") => return None,
Some(other) => {
config.new_build_error(
"spam-filter.classifier.model",
format!("Invalid model type: {}", other),
);
return None;
}
};
let w_params = FtrlParameters::parse(config, "spam-filter.classifier.parameters", 20);
let i_params = if config
.property_or_default("spam-filter.classifier.ccfh.enable", "false")
.unwrap_or(false)
{
let i_params = if ccfh {
Some(FtrlParameters::parse(
config,
"spam-filter.classifier.ccfh.parameters",
"spam-filter.classifier.parameters.ccfh",
w_params.feature_hash_size - 2,
))
} else {

View file

@ -7,7 +7,7 @@
use super::crypto::{EncryptMessage, EncryptMessageError};
use crate::{
cache::{MessageCacheFetch, email::MessageCacheAccess, mailbox::MailboxCacheAccess},
mailbox::{INBOX_ID, JUNK_ID, SENT_ID, UidMailbox},
mailbox::{INBOX_ID, JUNK_ID, SENT_ID, TRASH_ID, UidMailbox},
message::{
crypto::EncryptionParams,
index::{IndexMessage, extractors::VisitText},
@ -446,7 +446,9 @@ impl EmailIngest for Server {
if params.keywords.contains(&Keyword::Junk) {
train_spam = Some(true);
} else if params.keywords.contains(&Keyword::NotJunk) {
if !params.mailbox_ids.contains(&TRASH_ID) {
train_spam = Some(false);
}
} else if params.mailbox_ids[0] == JUNK_ID {
train_spam = Some(true);
} else if params.mailbox_ids[0] == INBOX_ID {

View file

@ -12,7 +12,10 @@ use crate::{
use ahash::AHashSet;
use common::{listener::SessionStream, storage::index::ObjectIndexBuilder};
use directory::Permission;
use email::message::{ingest::EmailIngest, metadata::MessageData};
use email::{
mailbox::TRASH_ID,
message::{ingest::EmailIngest, metadata::MessageData},
};
use imap_proto::{
Command, ResponseCode, ResponseType, StatusResponse,
protocol::{
@ -250,7 +253,8 @@ impl<T: SessionStream> SessionData<T> {
if keyword == &Keyword::Junk {
train_spam = Some(true);
break;
} else if keyword == &Keyword::NotJunk {
} else if keyword == &Keyword::NotJunk && !data.inner.has_mailbox_id(TRASH_ID) {
// Only train as ham if not in Trash (Apple likes to add NotJunk to trashed items, which would be spammy)
train_spam = Some(false);
break;
}
@ -258,7 +262,9 @@ impl<T: SessionStream> SessionData<T> {
if train_spam.is_none() {
for keyword in new_data.removed_keywords(data.inner) {
if keyword == &Keyword::Junk {
if !data.inner.has_mailbox_id(TRASH_ID) {
train_spam = Some(false);
}
break;
}
}

View file

@ -10,7 +10,6 @@ use crate::{
CalendarSearchField, ContactSearchField, EmailSearchField, SearchableField,
TracingSearchField,
},
write::SearchIndex,
};
use reqwest::{Error, Response, Url};
use serde_json::{Value, json};
@ -118,6 +117,8 @@ impl ElasticSearchStore {
#[cfg(feature = "test_mode")]
pub async fn drop_indexes(&self) -> trc::Result<()> {
use crate::write::SearchIndex;
for index in &[
SearchIndex::Email,
SearchIndex::Calendar,

View file

@ -10,7 +10,6 @@ use crate::{
CalendarSearchField, ContactSearchField, EmailSearchField, SearchableField,
TracingSearchField,
},
write::SearchIndex,
};
use reqwest::{Error, Response, Url};
use serde_json::{Value, json};
@ -148,6 +147,8 @@ impl MeiliSearchStore {
#[cfg(feature = "test_mode")]
pub async fn drop_indexes(&self) -> trc::Result<()> {
use crate::write::SearchIndex;
for index in &[
SearchIndex::Email,
SearchIndex::Calendar,