mail-server/crates/email/src/sieve/ingest.rs
2025-05-16 16:20:05 +02:00

722 lines
29 KiB
Rust

/*
* SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
*
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use super::{ActiveScript, SeenIdHash, SieveScript};
use crate::{
cache::{MessageCacheFetch, mailbox::MailboxCacheAccess},
mailbox::{INBOX_ID, TRASH_ID, manage::MailboxFnc},
message::{
delivery::AutogeneratedMessage,
ingest::{EmailIngest, IngestEmail, IngestSource, IngestedEmail},
},
};
use common::{
Server, auth::AccessToken, config::jmap::settings::SpecialUse, scripts::plugins::PluginContext,
};
use directory::{Permission, QueryBy};
use jmap_proto::types::{collection::Collection, id::Id, keyword::Keyword, property::Property};
use mail_parser::MessageParser;
use sieve::{Envelope, Event, Input, Mailbox, Recipient, Sieve};
use std::future::Future;
use std::{borrow::Cow, sync::Arc};
use store::{
Deserialize, Serialize, SerializeInfallible,
ahash::AHashMap,
dispatch::lookup::KeyValue,
query::Filter,
write::{
AlignedBytes, Archiver, BatchBuilder, BlobOp, UnversionedArchive, UnversionedArchiver,
},
};
use trc::{AddContext, SieveEvent};
use utils::config::utils::ParseValue;
struct SieveMessage<'x> {
pub raw_message: Cow<'x, [u8]>,
pub file_into: Vec<u32>,
pub flags: Vec<Keyword>,
}
pub trait SieveScriptIngest: Sync + Send {
#[allow(clippy::too_many_arguments)]
fn sieve_script_ingest(
&self,
access_token: &AccessToken,
raw_message: &[u8],
envelope_from: &str,
envelope_to: &str,
session_id: u64,
active_script: ActiveScript,
autogenerated: &mut Vec<AutogeneratedMessage>,
) -> impl Future<Output = trc::Result<IngestedEmail>> + Send;
fn sieve_script_get_active(
&self,
account_id: u32,
) -> impl Future<Output = trc::Result<Option<ActiveScript>>> + Send;
fn sieve_script_get_by_name(
&self,
account_id: u32,
name: &str,
) -> impl Future<Output = trc::Result<Option<Sieve>>> + Send;
fn sieve_script_compile(
&self,
account_id: u32,
document_id: u32,
) -> impl Future<Output = trc::Result<CompiledScript>> + Send;
}
impl SieveScriptIngest for Server {
#[allow(clippy::blocks_in_conditions)]
async fn sieve_script_ingest(
&self,
access_token: &AccessToken,
raw_message: &[u8],
envelope_from: &str,
envelope_to: &str,
session_id: u64,
active_script: ActiveScript,
autogenerated: &mut Vec<AutogeneratedMessage>,
) -> trc::Result<IngestedEmail> {
// Parse message
let message = if let Some(message) = MessageParser::new().parse(raw_message) {
message
} else {
return Err(
trc::EventType::MessageIngest(trc::MessageIngestEvent::Error)
.ctx(trc::Key::Code, 550)
.ctx(trc::Key::Reason, "Failed to parse e-mail message."),
);
};
// Obtain mailboxIds
let account_id = access_token.primary_id;
let mut cache = self
.get_cached_messages(account_id)
.await
.caused_by(trc::location!())?;
// Create Sieve instance
let mut instance = self.core.sieve.untrusted_runtime.filter_parsed(message);
// Set account name and email
let mail_from = self
.core
.storage
.directory
.query(QueryBy::Id(account_id), false)
.await
.caused_by(trc::location!())?
.and_then(|p| {
instance.set_user_full_name(p.description().unwrap_or_else(|| p.name()));
p.emails.into_iter().next()
});
// Set account address
let mail_from = mail_from.unwrap_or_else(|| envelope_to.into());
instance.set_user_address(&mail_from);
// Set envelope
instance.set_envelope(Envelope::From, envelope_from);
instance.set_envelope(Envelope::To, envelope_to);
let mut input = Input::script(
active_script.script_name.to_string(),
active_script.script.clone(),
);
let mut do_discard = false;
let mut do_deliver = false;
let mut reject_reason = None;
let mut messages: Vec<SieveMessage> = vec![SieveMessage {
raw_message: raw_message.into(),
file_into: Vec::new(),
flags: Vec::new(),
}];
let mut ingested_message = IngestedEmail {
id: Id::default(),
change_id: u64::MAX,
blob_id: Default::default(),
size: raw_message.len(),
imap_uids: Vec::new(),
};
let mut checked_ids: AHashMap<SeenIdHash, bool> = AHashMap::new();
while let Some(event) = instance.run(input) {
match event {
Ok(event) => match event {
Event::IncludeScript { name, .. } => match &name {
sieve::Script::Personal(name_) => {
if let Ok(Some(script)) =
self.sieve_script_get_by_name(account_id, name_).await
{
input = Input::script(name, script);
} else {
input = false.into();
}
}
sieve::Script::Global(name_) => {
if let Some(script) =
self.get_untrusted_sieve_script(&name_.to_lowercase(), session_id)
{
input = Input::script(name, script.clone());
} else {
input = false.into();
}
}
},
Event::MailboxExists {
mailboxes,
special_use,
} => {
if !mailboxes.is_empty() {
let mut special_use_ids = Vec::with_capacity(special_use.len());
for role in special_use {
special_use_ids.push(if role.eq_ignore_ascii_case("inbox") {
INBOX_ID
} else if role.eq_ignore_ascii_case("trash") {
TRASH_ID
} else {
let mut mailbox_id = u32::MAX;
if let Ok(role) = SpecialUse::parse_value(&role) {
if let Some(m) = cache.mailbox_by_role(&role) {
mailbox_id = m.document_id;
}
}
mailbox_id
});
}
let mut result = true;
for mailbox in mailboxes {
match mailbox {
Mailbox::Name(name) => {
if !matches!(
cache.mailbox_by_path(&name),
Some(item) if special_use_ids.is_empty() ||
special_use_ids.contains(&item.document_id)
) {
result = false;
break;
}
}
Mailbox::Id(id) => {
if !matches!(Id::from_bytes(id.as_bytes()), Some(id) if
cache.has_mailbox_id(&id.document_id()) &&
(special_use_ids.is_empty() ||
special_use_ids.contains(&id.document_id())))
{
result = false;
break;
}
}
}
}
input = result.into();
} else if !special_use.is_empty() {
let mut result = true;
for role in special_use {
if !role.eq_ignore_ascii_case("inbox")
&& !role.eq_ignore_ascii_case("trash")
{
let role = SpecialUse::parse_value(&role);
if role.is_err()
|| cache.mailbox_by_role(&role.unwrap()).is_none()
{
result = false;
break;
}
}
}
input = result.into();
} else {
input = false.into();
}
}
Event::DuplicateId { id, expiry, last } => {
let id_hash = SeenIdHash::new(account_id, active_script.hash, &id);
if let Some(result) = checked_ids.get(&id_hash) {
input = (*result).into();
} else {
let exists = self
.in_memory_store()
.key_get::<()>(id_hash.key())
.await
.caused_by(trc::location!())?
.is_some();
if !exists || last {
self.in_memory_store()
.key_set(KeyValue::new(id_hash.key(), vec![]).expires(expiry))
.await
.caused_by(trc::location!())?;
}
checked_ids.insert(id_hash, exists);
input = exists.into();
}
}
Event::Discard => {
do_discard = true;
input = true.into();
}
Event::Reject { reason, .. } => {
reject_reason = reason.into();
do_discard = true;
input = true.into();
}
Event::Keep { flags, message_id } => {
if let Some(message) = messages.get_mut(message_id) {
message.flags = flags.into_iter().map(Keyword::from).collect();
if !message.file_into.contains(&INBOX_ID) {
message.file_into.push(INBOX_ID);
}
do_deliver = true;
} else {
trc::event!(
Sieve(SieveEvent::UnexpectedError),
Details = "Unknown message id.",
MessageId = message_id,
SpanId = session_id
);
}
input = true.into();
}
Event::FileInto {
folder,
flags,
mailbox_id,
special_use,
create,
message_id,
} => {
let mut target_id = u32::MAX;
// Find mailbox by Id
if let Some(mailbox_id) =
mailbox_id.and_then(|m| Id::from_bytes(m.as_bytes()))
{
let mailbox_id = mailbox_id.document_id();
if cache.has_mailbox_id(&mailbox_id) {
target_id = mailbox_id;
}
}
// Find mailbox by role
if let Some(special_use) = special_use {
if target_id == u32::MAX {
if special_use.eq_ignore_ascii_case("inbox") {
target_id = INBOX_ID;
} else if special_use.eq_ignore_ascii_case("trash") {
target_id = TRASH_ID;
} else if let Ok(role) = SpecialUse::parse_value(&special_use) {
if let Some(item) = cache.mailbox_by_role(&role) {
target_id = item.document_id;
}
}
}
}
// Find mailbox by name
if target_id == u32::MAX {
if !create {
if let Some(m) = cache.mailbox_by_path(&folder) {
target_id = m.document_id;
}
} else if let Some(document_id) = self
.mailbox_create_path(account_id, &folder)
.await
.caused_by(trc::location!())?
{
cache = self
.get_cached_messages(account_id)
.await
.caused_by(trc::location!())?;
target_id = document_id;
}
}
// Default to Inbox
if target_id == u32::MAX {
target_id = INBOX_ID;
}
if let Some(message) = messages.get_mut(message_id) {
message.flags = flags.into_iter().map(Keyword::from).collect();
if !message.file_into.contains(&target_id) {
message.file_into.push(target_id);
}
do_deliver = true;
} else {
trc::event!(
Sieve(SieveEvent::UnexpectedError),
Details = "Unknown message id.",
MessageId = message_id,
SpanId = session_id
);
}
input = true.into();
}
Event::SendMessage {
recipient,
message_id,
..
} => {
input = true.into();
if let Some(message) = messages.get(message_id) {
let recipients: Vec<String> = match recipient {
Recipient::Address(rcpt) => vec![rcpt],
Recipient::Group(rcpts) => rcpts,
Recipient::List(_) => {
// Not yet implemented
continue;
}
};
if message.raw_message.len() <= self.core.jmap.mail_max_size {
trc::event!(
Sieve(SieveEvent::SendMessage),
From = mail_from.clone(),
To = recipients
.iter()
.map(|r| trc::Value::String(r.as_str().into()))
.collect::<Vec<_>>(),
Size = message.raw_message.len(),
SpanId = session_id
);
autogenerated.push(AutogeneratedMessage {
sender_address: mail_from.clone(),
recipients,
message: message.raw_message.to_vec(),
});
} else {
trc::event!(
Sieve(SieveEvent::MessageTooLarge),
From = mail_from.clone(),
To = recipients
.iter()
.map(|r| trc::Value::String(r.as_str().into()))
.collect::<Vec<_>>(),
Size = message.raw_message.len(),
Limit = self.core.jmap.mail_max_size,
SpanId = session_id,
);
}
} else {
trc::event!(
Sieve(SieveEvent::UnexpectedError),
Details = "Unknown message id.",
MessageId = message_id,
SpanId = session_id
);
continue;
}
}
Event::ListContains { .. }
| Event::Notify { .. }
| Event::SetEnvelope { .. } => {
// Not allowed
input = false.into();
}
Event::Function { id, arguments } => {
input = self
.core
.run_plugin(
id,
PluginContext {
session_id,
server: self,
message: instance.message(),
modifications: &mut Vec::new(),
access_token: access_token.into(),
arguments,
},
)
.await;
}
Event::CreatedMessage { message, .. } => {
messages.push(SieveMessage {
raw_message: message.into(),
file_into: Vec::new(),
flags: Vec::new(),
});
input = true.into();
}
},
#[cfg(feature = "test_mode")]
Err(sieve::runtime::RuntimeError::ScriptErrorMessage(err)) => {
panic!("Sieve test failed: {}", err);
}
Err(err) => {
trc::event!(
Sieve(SieveEvent::RuntimeError),
Reason = err.to_string(),
SpanId = session_id
);
input = true.into();
}
}
}
// Fail-safe, no discard and no keep seen, assume that something went wrong and file anyway.
if !do_deliver && !do_discard {
messages[0].file_into.push(INBOX_ID);
}
// Deliver messages
let mut last_temp_error = None;
let mut has_delivered = false;
let can_spam_train = self.email_bayes_can_train(access_token);
for (message_id, sieve_message) in messages.into_iter().enumerate() {
if !sieve_message.file_into.is_empty() {
// Parse message if needed
let message = if message_id == 0 && !instance.has_message_changed() {
instance.take_message()
} else if let Some(message) =
MessageParser::new().parse(sieve_message.raw_message.as_ref())
{
message
} else {
trc::event!(
Sieve(SieveEvent::UnexpectedError),
Details = "Failed to parse Sieve generated message.",
SpanId = session_id
);
continue;
};
// Deliver message
match self
.email_ingest(IngestEmail {
raw_message: &sieve_message.raw_message,
message: message.into(),
resource: access_token.as_resource_token(),
mailbox_ids: sieve_message.file_into,
keywords: sieve_message.flags,
received_at: None,
source: IngestSource::Smtp {
deliver_to: envelope_to,
},
spam_classify: access_token.has_permission(Permission::SpamFilterClassify),
spam_train: can_spam_train,
session_id,
})
.await
{
Ok(ingested_message_) => {
has_delivered = true;
ingested_message = ingested_message_;
}
Err(err) => {
last_temp_error = err.into();
}
}
}
}
if let Some(reject_reason) = reject_reason {
Err(
trc::EventType::MessageIngest(trc::MessageIngestEvent::Error)
.ctx(trc::Key::Code, 571)
.ctx(trc::Key::Reason, reject_reason),
)
} else if has_delivered || last_temp_error.is_none() {
Ok(ingested_message)
} else {
// There were problems during delivery
#[allow(clippy::unnecessary_unwrap)]
Err(last_temp_error.unwrap())
}
}
async fn sieve_script_get_active(&self, account_id: u32) -> trc::Result<Option<ActiveScript>> {
// Find the currently active script
if let Some(document_id) = self
.store()
.filter(
account_id,
Collection::SieveScript,
vec![Filter::eq(Property::IsActive, vec![1u8])],
)
.await
.caused_by(trc::location!())?
.results
.min()
{
let script = self.sieve_script_compile(account_id, document_id).await?;
Ok(Some(ActiveScript {
document_id,
script: Arc::new(script.script),
script_name: script.name,
hash: script.hash,
}))
} else {
Ok(None)
}
}
async fn sieve_script_get_by_name(
&self,
account_id: u32,
name: &str,
) -> trc::Result<Option<Sieve>> {
// Find the script by name
if let Some(document_id) = self
.store()
.filter(
account_id,
Collection::SieveScript,
vec![Filter::eq(Property::Name, name.serialize())],
)
.await
.caused_by(trc::location!())?
.results
.min()
{
self.sieve_script_compile(account_id, document_id)
.await
.map(|script| Some(script.script))
} else {
Ok(None)
}
}
#[allow(clippy::blocks_in_conditions)]
async fn sieve_script_compile(
&self,
account_id: u32,
document_id: u32,
) -> trc::Result<CompiledScript> {
// Obtain script object
let script_object = self
.get_archive(account_id, Collection::SieveScript, document_id)
.await?
.ok_or_else(|| {
trc::StoreEvent::NotFound
.into_err()
.caused_by(trc::location!())
.document_id(document_id)
})?;
// Obtain the sieve script length
let hash = script_object.hash;
let unarchived_script = script_object
.unarchive::<SieveScript>()
.caused_by(trc::location!())?;
let script_offset = u32::from(unarchived_script.size) as usize;
// Obtain the sieve script blob
let script_bytes = self
.core
.storage
.blob
.get_blob(unarchived_script.blob_hash.0.as_ref(), 0..usize::MAX)
.await
.caused_by(trc::location!())?
.ok_or_else(|| {
trc::StoreEvent::NotFound
.into_err()
.caused_by(trc::location!())
.document_id(document_id)
})?;
// Obtain the precompiled script
if let Some(script) = script_bytes.get(script_offset..).and_then(|bytes| {
<UnversionedArchive<AlignedBytes> as Deserialize>::deserialize(bytes)
.ok()?
.deserialize::<Sieve>()
.ok()
}) {
Ok(CompiledScript {
script,
name: unarchived_script.name.as_str().into(),
hash,
})
} else {
// Deserialization failed, probably because the script compiler version changed
match self.core.sieve.untrusted_compiler.compile(
script_bytes.get(0..script_offset).ok_or_else(|| {
trc::StoreEvent::NotFound
.into_err()
.caused_by(trc::location!())
.document_id(document_id)
})?,
) {
Ok(sieve) => {
// Store updated compiled sieve script
let sieve = UnversionedArchiver::new(sieve);
let compiled_bytes = sieve.serialize().caused_by(trc::location!())?;
let mut updated_sieve_bytes =
Vec::with_capacity(script_offset + compiled_bytes.len());
updated_sieve_bytes.extend_from_slice(&script_bytes[0..script_offset]);
updated_sieve_bytes.extend_from_slice(&compiled_bytes);
// Store updated blob
let new_blob_hash = self
.put_blob(account_id, &updated_sieve_bytes, false)
.await?
.hash;
let mut new_script_object =
rkyv::deserialize(unarchived_script).caused_by(trc::location!())?;
let blob_hash =
std::mem::replace(&mut new_script_object.blob_hash, new_blob_hash.clone());
let new_archive = Archiver::new(new_script_object);
// Update script object
let mut batch = BatchBuilder::new();
batch
.with_account_id(account_id)
.with_collection(Collection::SieveScript)
.update_document(document_id)
.assert_value(Property::Value, &script_object)
.set(
Property::Value,
new_archive.serialize().caused_by(trc::location!())?,
)
.clear(BlobOp::Link { hash: blob_hash })
.set(
BlobOp::Link {
hash: new_blob_hash,
},
Vec::new(),
);
self.store()
.write(batch.build_all())
.await
.caused_by(trc::location!())?;
Ok(CompiledScript {
script: sieve.into_inner(),
name: new_archive.into_inner().name,
hash,
})
}
Err(error) => Err(trc::StoreEvent::UnexpectedError
.caused_by(trc::location!())
.reason(error)
.details("Failed to compile Sieve script")),
}
}
}
}
pub struct CompiledScript {
pub script: Sieve,
pub name: String,
pub hash: u32,
}