Updated IMAP autolearn behaviour to deal with Apple Mail's shenanigans
Some checks are pending
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run

This commit is contained in:
mdecimus 2025-12-14 16:17:39 +01:00
parent e1c1e9a6d4
commit 4675b18491
5 changed files with 31 additions and 18 deletions

View file

@ -451,21 +451,24 @@ impl PyzorConfig {
impl ClassifierConfig { impl ClassifierConfig {
pub fn parse(config: &mut Config) -> Option<Self> { pub fn parse(config: &mut Config) -> Option<Self> {
if !config let ccfh = match config.value("spam-filter.classifier.model") {
.property_or_default("spam-filter.classifier.enable", "true") Some("ftrl-fh") | None => false,
.unwrap_or(true) Some("ftrl-ccfh") => true,
{ Some("disabled") => return None,
Some(other) => {
config.new_build_error(
"spam-filter.classifier.model",
format!("Invalid model type: {}", other),
);
return None; return None;
} }
};
let w_params = FtrlParameters::parse(config, "spam-filter.classifier.parameters", 20); let w_params = FtrlParameters::parse(config, "spam-filter.classifier.parameters", 20);
let i_params = if config let i_params = if ccfh {
.property_or_default("spam-filter.classifier.ccfh.enable", "false")
.unwrap_or(false)
{
Some(FtrlParameters::parse( Some(FtrlParameters::parse(
config, config,
"spam-filter.classifier.ccfh.parameters", "spam-filter.classifier.parameters.ccfh",
w_params.feature_hash_size - 2, w_params.feature_hash_size - 2,
)) ))
} else { } else {

View file

@ -7,7 +7,7 @@
use super::crypto::{EncryptMessage, EncryptMessageError}; use super::crypto::{EncryptMessage, EncryptMessageError};
use crate::{ use crate::{
cache::{MessageCacheFetch, email::MessageCacheAccess, mailbox::MailboxCacheAccess}, cache::{MessageCacheFetch, email::MessageCacheAccess, mailbox::MailboxCacheAccess},
mailbox::{INBOX_ID, JUNK_ID, SENT_ID, UidMailbox}, mailbox::{INBOX_ID, JUNK_ID, SENT_ID, TRASH_ID, UidMailbox},
message::{ message::{
crypto::EncryptionParams, crypto::EncryptionParams,
index::{IndexMessage, extractors::VisitText}, index::{IndexMessage, extractors::VisitText},
@ -446,7 +446,9 @@ impl EmailIngest for Server {
if params.keywords.contains(&Keyword::Junk) { if params.keywords.contains(&Keyword::Junk) {
train_spam = Some(true); train_spam = Some(true);
} else if params.keywords.contains(&Keyword::NotJunk) { } else if params.keywords.contains(&Keyword::NotJunk) {
if !params.mailbox_ids.contains(&TRASH_ID) {
train_spam = Some(false); train_spam = Some(false);
}
} else if params.mailbox_ids[0] == JUNK_ID { } else if params.mailbox_ids[0] == JUNK_ID {
train_spam = Some(true); train_spam = Some(true);
} else if params.mailbox_ids[0] == INBOX_ID { } else if params.mailbox_ids[0] == INBOX_ID {

View file

@ -12,7 +12,10 @@ use crate::{
use ahash::AHashSet; use ahash::AHashSet;
use common::{listener::SessionStream, storage::index::ObjectIndexBuilder}; use common::{listener::SessionStream, storage::index::ObjectIndexBuilder};
use directory::Permission; use directory::Permission;
use email::message::{ingest::EmailIngest, metadata::MessageData}; use email::{
mailbox::TRASH_ID,
message::{ingest::EmailIngest, metadata::MessageData},
};
use imap_proto::{ use imap_proto::{
Command, ResponseCode, ResponseType, StatusResponse, Command, ResponseCode, ResponseType, StatusResponse,
protocol::{ protocol::{
@ -250,7 +253,8 @@ impl<T: SessionStream> SessionData<T> {
if keyword == &Keyword::Junk { if keyword == &Keyword::Junk {
train_spam = Some(true); train_spam = Some(true);
break; break;
} else if keyword == &Keyword::NotJunk { } else if keyword == &Keyword::NotJunk && !data.inner.has_mailbox_id(TRASH_ID) {
// Only train as ham if not in Trash (Apple likes to add NotJunk to trashed items, which would be spammy)
train_spam = Some(false); train_spam = Some(false);
break; break;
} }
@ -258,7 +262,9 @@ impl<T: SessionStream> SessionData<T> {
if train_spam.is_none() { if train_spam.is_none() {
for keyword in new_data.removed_keywords(data.inner) { for keyword in new_data.removed_keywords(data.inner) {
if keyword == &Keyword::Junk { if keyword == &Keyword::Junk {
if !data.inner.has_mailbox_id(TRASH_ID) {
train_spam = Some(false); train_spam = Some(false);
}
break; break;
} }
} }

View file

@ -10,7 +10,6 @@ use crate::{
CalendarSearchField, ContactSearchField, EmailSearchField, SearchableField, CalendarSearchField, ContactSearchField, EmailSearchField, SearchableField,
TracingSearchField, TracingSearchField,
}, },
write::SearchIndex,
}; };
use reqwest::{Error, Response, Url}; use reqwest::{Error, Response, Url};
use serde_json::{Value, json}; use serde_json::{Value, json};
@ -118,6 +117,8 @@ impl ElasticSearchStore {
#[cfg(feature = "test_mode")] #[cfg(feature = "test_mode")]
pub async fn drop_indexes(&self) -> trc::Result<()> { pub async fn drop_indexes(&self) -> trc::Result<()> {
use crate::write::SearchIndex;
for index in &[ for index in &[
SearchIndex::Email, SearchIndex::Email,
SearchIndex::Calendar, SearchIndex::Calendar,

View file

@ -10,7 +10,6 @@ use crate::{
CalendarSearchField, ContactSearchField, EmailSearchField, SearchableField, CalendarSearchField, ContactSearchField, EmailSearchField, SearchableField,
TracingSearchField, TracingSearchField,
}, },
write::SearchIndex,
}; };
use reqwest::{Error, Response, Url}; use reqwest::{Error, Response, Url};
use serde_json::{Value, json}; use serde_json::{Value, json};
@ -148,6 +147,8 @@ impl MeiliSearchStore {
#[cfg(feature = "test_mode")] #[cfg(feature = "test_mode")]
pub async fn drop_indexes(&self) -> trc::Result<()> { pub async fn drop_indexes(&self) -> trc::Result<()> {
use crate::write::SearchIndex;
for index in &[ for index in &[
SearchIndex::Email, SearchIndex::Email,
SearchIndex::Calendar, SearchIndex::Calendar,