Add parseaddr_unicode()

This commit is contained in:
Son NK 2020-04-05 12:07:40 +02:00
parent 6258ef0c11
commit 5fff1e86ce
2 changed files with 31 additions and 1 deletions

View file

@ -1,4 +1,5 @@
import os
from email.header import decode_header
from email.message import Message
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
@ -432,3 +433,18 @@ def get_spam_info(msg: Message) -> (bool, str):
spamassassin_answer = spamassassin_status[: spamassassin_status.find(",")]
return spamassassin_answer.lower() == "yes", spamassassin_status
def parseaddr_unicode(addr) -> (str, str):
"""Like parseaddr but return name in unicode instead of in RFC 2047 format
'=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= <abcd@gmail.com>' -> ('Nhơn Nguyễn', "abcd@gmail.com")
"""
name, email = parseaddr(addr)
email = email.lower()
if name:
decoded_string, charset = decode_header(name)[0]
if charset is not None:
return decoded_string.decode(charset), email
else:
return decoded_string, email

View file

@ -6,7 +6,7 @@ from app.email_utils import (
can_be_used_as_personal_email,
delete_header,
add_or_replace_header,
)
parseaddr_unicode)
from app.extensions import db
from app.models import User, CustomDomain
@ -61,3 +61,17 @@ def test_add_or_replace_header():
add_or_replace_header(msg, "H", "new")
assert msg._headers == [("H", "new")]
def test_parseaddr_unicode():
# ascii address
assert parseaddr_unicode("First Last <abcd@gmail.com>") == ("First Last", "abcd@gmail.com")
# Handle quote
assert parseaddr_unicode('"First Last" <abcd@gmail.com>') == ("First Last", "abcd@gmail.com")
# UTF-8 charset
assert parseaddr_unicode("=?UTF-8?B?TmjGoW4gTmd1eeG7hW4=?= <abcd@gmail.com>") == ('Nhơn Nguyễn', "abcd@gmail.com")
# iso-8859-1 charset
assert parseaddr_unicode("=?iso-8859-1?q?p=F6stal?= <abcd@gmail.com>") == ('pöstal', "abcd@gmail.com")