Fix POP mail parsing in multipart bounce e-mails.

This was originally authored by @stevesavanna in #707. This commit
contains changes and refactors that could not be pushed to the original PR.

Changes from #707

- Don't ignore bounce mails missing campaign / subscriber UUIDs. The
  original behaviour falls back to looking up subscribers by e-mail.
- Refactor repetetive header.get + regexp conditions per header into
  a simpler lookup map.
- Trim e-mail header values of `\r`.

Closes #707, #763

Co-authored-by: stevesavanna <steven@savannacorp.com>
This commit is contained in:
Kailash Nadh 2022-04-03 11:37:11 +05:30
parent a7145511fd
commit 73e4c1cf28
2 changed files with 74 additions and 27 deletions

View file

@ -2,7 +2,9 @@ package mailbox
import ( import (
"encoding/json" "encoding/json"
"io"
"regexp" "regexp"
"strings"
"time" "time"
"github.com/emersion/go-message" "github.com/emersion/go-message"
@ -17,9 +19,24 @@ type POP struct {
client *pop3.Client client *pop3.Client
} }
type bounceHeaders struct {
Header string
Regexp *regexp.Regexp
}
var ( var (
reCampUUID = regexp.MustCompile(`(?m)(?m:^` + models.EmailHeaderCampaignUUID + `:\s+?)([a-z0-9\-]{36})`) // List of header to look for in the e-mail body, regexp to fall back to if the header is empty.
reSubUUID = regexp.MustCompile(`(?m)(?m:^` + models.EmailHeaderSubscriberUUID + `:\s+?)([a-z0-9\-]{36})`) headerLookups = []bounceHeaders{
{models.EmailHeaderCampaignUUID, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderCampaignUUID + `:\s+?)([a-z0-9\-]{36})`)},
{models.EmailHeaderSubscriberUUID, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderSubscriberUUID + `:\s+?)([a-z0-9\-]{36})`)},
{models.EmailHeaderDate, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderDate + `:\s+?)([\w,\,\ ,:,+,-]*(?:\(?:\w*\))?)`)},
{models.EmailHeaderFrom, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderFrom + `:\s+?)(.*)`)},
{models.EmailHeaderSubject, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderSubject + `:\s+?)(.*)`)},
{models.EmailHeaderMessageId, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderMessageId + `:\s+?)(.*)`)},
{models.EmailHeaderDeliveredTo, regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderDeliveredTo + `:\s+?)(.*)`)},
}
reHdrReceived = regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderReceived + `:\s+?)(.*)`)
) )
// NewPOP returns a new instance of the POP mailbox client. // NewPOP returns a new instance of the POP mailbox client.
@ -81,29 +98,51 @@ func (p *POP) Scan(limit int, ch chan models.Bounce) error {
return err return err
} }
// Check if the identifiers are available in the parsed message. h := m
var (
campUUID = m.Header.Get(models.EmailHeaderCampaignUUID)
subUUID = m.Header.Get(models.EmailHeaderSubscriberUUID)
)
// If they are not, try to extract them from the message body. // If this is a multipart message, find the last part.
if campUUID == "" { if mr := m.MultipartReader(); mr != nil {
if u := reCampUUID.FindSubmatch(b.Bytes()); len(u) == 2 { for {
campUUID = string(u[1]) part, err := mr.NextPart()
if err == io.EOF {
break
} else if err != nil {
return err
} }
} h = part
if subUUID == "" {
if u := reSubUUID.FindSubmatch(b.Bytes()); len(u) == 2 {
subUUID = string(u[1])
} }
} }
if campUUID == "" || subUUID == "" { // Reset the "unread portion" pointer of the message buffer.
continue // If you don't do this, you can't read the entire body because the pointer will not point to the beginning.
b, _ = c.RetrRaw(id)
// Lookup headers in the e-mail. If a header isn't found, fall back to regexp lookups.
hdr := make(map[string]string, 7)
for _, l := range headerLookups {
v := h.Header.Get(l.Header)
// Not in the header. Try regexp.
if v == "" {
if m := l.Regexp.FindAllSubmatch(b.Bytes(), -1); m != nil {
v = string(m[len(m)-1][1])
}
} }
date, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", m.Header.Get("Date")) hdr[l.Header] = strings.TrimSpace(v)
}
// Received is a []string header.
msgReceived := h.Header.Map()[models.EmailHeaderReceived]
if len(msgReceived) == 0 {
if u := reHdrReceived.FindAllSubmatch(b.Bytes(), -1); u != nil {
for i := 0; i < len(u); i++ {
msgReceived = append(msgReceived, string(u[i][1]))
}
}
}
date, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", hdr[models.EmailHeaderDate])
if date.IsZero() { if date.IsZero() {
date = time.Now() date = time.Now()
} }
@ -116,21 +155,21 @@ func (p *POP) Scan(limit int, ch chan models.Bounce) error {
DeliveredTo string `json:"delivered_to"` DeliveredTo string `json:"delivered_to"`
Received []string `json:"received"` Received []string `json:"received"`
}{ }{
From: m.Header.Get("From"), From: hdr[models.EmailHeaderFrom],
Subject: m.Header.Get("Subject"), Subject: hdr[models.EmailHeaderSubject],
MessageID: m.Header.Get("Message-Id"), MessageID: hdr[models.EmailHeaderMessageId],
DeliveredTo: m.Header.Get("Delivered-To"), DeliveredTo: hdr[models.EmailHeaderDeliveredTo],
Received: m.Header.Map()["Received"], Received: msgReceived,
}) })
select { select {
case ch <- models.Bounce{ case ch <- models.Bounce{
Type: "hard", Type: "hard",
CampaignUUID: campUUID, CampaignUUID: hdr[models.EmailHeaderCampaignUUID],
SubscriberUUID: subUUID, SubscriberUUID: hdr[models.EmailHeaderSubscriberUUID],
Source: p.opt.Host, Source: p.opt.Host,
CreatedAt: date, CreatedAt: date,
Meta: json.RawMessage(meta), Meta: meta,
}: }:
default: default:
} }

View file

@ -69,6 +69,14 @@ const (
EmailHeaderSubscriberUUID = "X-Listmonk-Subscriber" EmailHeaderSubscriberUUID = "X-Listmonk-Subscriber"
EmailHeaderCampaignUUID = "X-Listmonk-Campaign" EmailHeaderCampaignUUID = "X-Listmonk-Campaign"
// Standard e-mail headers.
EmailHeaderDate = "Date"
EmailHeaderFrom = "From"
EmailHeaderSubject = "Subject"
EmailHeaderMessageId = "Message-Id"
EmailHeaderDeliveredTo = "Delivered-To"
EmailHeaderReceived = "Received"
BounceTypeHard = "hard" BounceTypeHard = "hard"
BounceTypeSoft = "soft" BounceTypeSoft = "soft"
) )