Add email anonymizer option in case is needed for GDPR

This commit is contained in:
Sergio Del Río Mayoral 2021-11-10 14:12:14 +01:00
parent 88c94f979a
commit cf38be511f
6 changed files with 185 additions and 0 deletions

View file

@ -64,6 +64,7 @@ COPY /configs/rsyslog*.conf /etc/
COPY /configs/opendkim.conf /etc/opendkim/opendkim.conf
COPY /configs/smtp_header_checks /etc/postfix/smtp_header_checks
COPY /scripts/*.sh /
COPY /scripts/*.py /
RUN chmod +x /run.sh /opendkim.sh

View file

@ -26,6 +26,7 @@ Simple postfix relay host ("postfix null client") for your Docker containers. Ba
* [POSTFIX_mynetworks](#postfix_mynetworks)
* [POSTFIX_message_size_limit](#postfix_message_size_limit)
* [Overriding specific postfix settings](#overriding-specific-postfix-settings)
* [ANON_EMAIL](#anon_email)
* [DKIM / DomainKeys](#dkim--domainkeys)
* [Supplying your own DKIM keys](#supplying-your-own-dkim-keys)
* [Auto-generating the DKIM selectors through the image](#auto-generating-the-dkim-selectors-through-the-image)
@ -331,6 +332,19 @@ Any Postfix [configuration option](http://www.postfix.org/postconf.5.html) can b
environment variables, e.g. `POSTFIX_allow_mail_to_commands=alias,forward,include`. Specifying no content (empty
variable) will remove that variable from postfix config.
#### ANON_EMAIL
Anonymize email in Postfix logs. It mask the email content by putting `*` in the middle of the name and the domain. Sample: `from=<a*****************s)@a***********.com>`
The letters allowed to be unmasked can be set using env vars:
* `ANON_EMAIL_PREFIX`: First letters from the username (default 1).
* `ANON_EMAIL_SUFFIX`: Last letters from the username (default 1).
* `ANON_DOMAIN_PREFIX`: First letters from the domain to be unmasked (default 1).
* `ANON_DOMAIN_SUFFIX`: Last letters from the domain, most of the cases you could see `****.com` (default 4).
If the username is too short (`len(username) - ANON_EMAIL_PREFIX - ANON_EMAIL_SUFFIX) < 2`) , all the letters will be masked.
### DKIM / DomainKeys
**This image is equipped with support for DKIM.** If you want to use DKIM you will need to generate DKIM keys. These can

View file

@ -41,11 +41,15 @@ template(name="plain" type="list") {
constant(value="\n")
}
#anon_email module(load="mmexternal")
if $syslogseverity <= '6' then {
# Do not log healthchecks
if ($msg contains "connect from localhost[127.0.0.1]") then { stop }
if ($msg contains "lost connection after EHLO from localhost[127.0.0.1]") then { stop }
if ($msg contains "disconnect from localhost[127.0.0.1] ehlo=1 commands=1") then { stop }
# email anonymizer
#anon_email action(type="mmexternal" binary="/anon_email.py ANON_EMAIL_PREFIX ANON_EMAIL_SUFFIX ANON_DOMAIN_PREFIX ANON_DOMAIN_SUFFIX" interface.input="msg" )
# matching logs will be saved
action(type="omfile" DynaFile="devicelog" template="<log-format>" DirCreateMode="0755" FileCreateMode="0644")
# enable below to stop processing further this log

143
scripts/anon_email.py Executable file
View file

@ -0,0 +1,143 @@
#!/usr/bin/env python3
"""A message modification plugin to anonymize email address.
Based on rsyslog sample anon_cc_nbrs
https://github.com/rsyslog/rsyslog/tree/master/plugins/external/messagemod/anon_cc_nbrs
Copyright (C) 2021 by Sergio del Rio
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
-or-
see COPYING.ASL20 in the source distribution
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
import re
import json
import os
import sys
import logging
# App logic global variables
def onInit():
""" Initialize processing
"""
global rc
global pattern
global email_prefix
global email_suffix
global domain_prefix
global domain_suffix
ANON_EMAIL_PREFIX = 1
ANON_EMAIL_SUFFIX = 2
ANON_DOMAIN_PREFIX = 3
ANON_DOMAIN_SUFFIX = 4
pattern = '([a-zA-Z0-9_.+-]+)@([a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)' # email regexp
rc = re.compile("("+")|(" + pattern + ")")
def get_env(var_index, default=1):
var = default
#if os.environ.get(env_var):
try:
defined = int(sys.argv[var_index])
var = defined if defined >= 0 else default
except Exception as e:
pass
return var
email_prefix = get_env(ANON_EMAIL_PREFIX)
email_suffix = get_env(ANON_EMAIL_SUFFIX)
domain_prefix = get_env(ANON_DOMAIN_PREFIX)
domain_suffix = get_env(ANON_DOMAIN_SUFFIX, default=4)
def onReceive(msg):
"""Entry point where actual work needs to be done. It receives
the messge from rsyslog and now needs to examine it, do any processing
necessary. The to-be-modified properties (one or many) need to be pushed
back to stdout, in JSON format, with no interim line breaks and a line
break at the end of the JSON. If no field is to be modified, empty
json ("{}") needs to be emitted.
Note that no batching takes place (contrary to the output module skeleton)
and so each message needs to be fully processed (rsyslog will wait for the
reply before the next message is pushed to this module).
"""
global rc
global pattern
global email_prefix
global email_suffix
global domain_prefix
global domain_suffix
def anonymize_word(word, prefix=1, suffix=1):
anonymized_word = ''
# if word is too short just anonymyze everything
if ((len(word)-prefix-suffix)<2):
return '*'
for idx, val in enumerate(word):
anonymized_word += val if (idx < prefix or idx >= (len(word)-suffix)) else '*'
return anonymized_word
def lookup(match):
res = re.match(pattern, match.group(0))
if res:
mail_user = str(res.group(1))
mail_domain = str(res.group(2))
return \
anonymize_word(mail_user, prefix=email_prefix, suffix=email_suffix) \
+ '@' + \
anonymize_word(mail_domain, prefix=domain_prefix, suffix=domain_suffix)
res_msg = rc.sub(lambda m: lookup(m), msg)
if res_msg == msg:
print(json.dumps({}))
else:
print(json.dumps({'msg': res_msg}))
def onExit():
""" Nothing to do here
"""
pass
"""
-------------------------------------------------------
This is plumbing that DOES NOT need to be CHANGED
-------------------------------------------------------
Implementor's note: Python seems to very agressively
buffer stdouot. The end result was that rsyslog does not
receive the script's messages in a timely manner (sometimes
even never, probably due to races). To prevent this, we
flush stdout after we have done processing. This is especially
important once we get to the point where the plugin does
two-way conversations with rsyslog. Do NOT change this!
See also: https://github.com/rsyslog/rsyslog/issues/22
"""
onInit()
keepRunning = 1
while keepRunning == 1:
msg = sys.stdin.readline()
if msg:
msg = msg[:-1] # remove LF
onReceive(msg)
sys.stdout.flush() # very important, Python buffers far too much!
else: # an empty line means stdin has been closed
keepRunning = 0
onExit()
sys.stdout.flush() # very important, Python buffers far too much!

View file

@ -28,6 +28,28 @@ rsyslog_log_format() {
sed -i -E "s/<log-format>/${log_format}/" /etc/rsyslog.conf
}
anon_email_log() {
local anon_email="${ANON_EMAIL}"
if [[ $(egrep -ie "(yes|true)" <<<"${anon_email}") ]] ; then
info "Using anonymizer email script"
sed -i -E "s/^#anon_email//g" /etc/rsyslog.conf
fi
if [[ -v ANON_EMAIL_PREFIX ]]; then
sed -i -E "s/ANON_EMAIL_PREFIX/${ANON_EMAIL_PREFIX}/g" /etc/rsyslog.conf
fi
if [[ -v ANON_EMAIL_SUFFIX ]]; then
sed -i -E "s/ANON_EMAIL_SUFFIX/${ANON_EMAIL_SUFFIX}/g" /etc/rsyslog.conf
fi
if [[ -v ANON_DOMAIN_PREFIX ]]; then
sed -i -E "s/ANON_DOMAIN_PREFIX/${ANON_DOMAIN_PREFIX}/g" /etc/rsyslog.conf
fi
if [[ -v ANON_DOMAIN_SUFFIX ]]; then
sed -i -E "s/ANON_DOMAIN_SUFFIX/${ANON_DOMAIN_SUFFIX}/g" /etc/rsyslog.conf
fi
}
setup_conf() {
local srcfile
local dstfile

View file

@ -7,6 +7,7 @@ set -e
announce_startup # Print startup banner
setup_timezone # Check if we need to configure the container timezone
rsyslog_log_format # Setup rsyslog output format
anon_email_log # Setup email anonymizer
setup_conf # Copy over files from /etc/postfix.template to /etc/postfix, if the user mounted the folder manually
reown_folders # Make and reown /var/spool/postfix/ folders
postfix_upgrade_conf # Upgrade old coniguration, replace "hash:" and "btree:" databases to "lmdb:"