From f5d0e56b1bc44b438682689382fc4fde348e4018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bojan=20=C4=8Cekrli=C4=87?= Date: Mon, 28 Mar 2022 19:42:56 +0200 Subject: [PATCH] New: Add smart email anonymizer This email anonymizer tries to be a bit more smart about how it goes about anonymizing email addresses, by providing as much as possible information while still making sure to respect user's privacy. More info available in `README.md`. --- README.md | 22 ++++++++--- scripts/email-anonymizer.py | 2 +- unit-tests/email-anonymizer-smart.bats | 54 +++++++++++++------------- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 4d02be4..c938b7c 100644 --- a/README.md +++ b/README.md @@ -364,7 +364,11 @@ E.g.: * `s@[192.168.8.10]` -> `s*s@[*.*.*.*]` * `"multi....dot"@[IPv6:2001:db8:85a3:8d3:1319:8a2e:370:7348]` -> `"m*t"@[IPv6:***********]` -Configure the symbol by providing the optional parameter, e.g.: `ANONYMIZE_EMAILS=smart?mask_symbol=#` +Configuration parameters: + +| Property | Default value | Required | Description | +|------------------|---------------|----------|-------------| +| `mask_symbol` | `*` | no | Mask symbol to use instead of replaced characters | ##### The `paranoid` filter @@ -381,9 +385,11 @@ E.g.: * `s@[192.168.8.10]` -> `*@[*]` * `"multi....dot"@[IPv6:2001:db8:85a3:8d3:1319:8a2e:370:7348]` -> `*@[IPv6:*]` -##### The `noop` filter +Configuration parameters: -This filter doesn't do anything. It's used for testing purposes only. +| Property | Default value | Required | Description | +|------------------|---------------|----------|-------------| +| `mask_symbol` | `*` | no | Mask symbol to use instead of replaced characters | ##### The `hash` filter @@ -394,9 +400,9 @@ E.g.: * `prettyandsimple@example.com` -> `<3052a860ddfde8b50e39843d8f1c9f591bec442823d97948b811d38779e2c757>` for (`ANONYMIZE_EMAILS=hash?salt=hello%20world`) * `prettyandsimple@example.com` -> `c58731d3@8bd7a35c` for (`ANONYMIZE_EMAILS=hash?salt=hello%20world&split=true&short_sha=t&prefix=&suffix=`) -Filter will not work without configuration. You will need to provide (at least) the salt, e.g.: +Filter will not work without configuration. You will need to provide (at least) the salt, e.g.: `ANONYMIZE_EMAILS=hash?salt=demo` -`ANONYMIZE_EMAILS=hash?salt=demo[&prefix=][&suffix=][&split=][&short_sha=][&case_sensitive=]` +Configuration parameters: | Property | Default value | Required | Description | |------------------|---------------|----------|-------------| @@ -407,9 +413,13 @@ Filter will not work without configuration. You will need to provide (at least) | `short_sha` | `false` | no | Set to `1`, `t` or `true` to return just the first 8 characters of the hash | | `case_sensitive` | `true` | no | Set to `0`, `f` or `false` to convert email to lowercase before hashing | +##### The `noop` filter + +This filter doesn't do anything. It's used for testing purposes only. + ##### Writting your own filters -It's easy enough to write your own filters. The simplest way would be to take the `email-anonymizer.py` filte in this +It's easy enough to write your own filters. The simplest way would be to take the `email-anonymizer.py` file in this image, write your own and then attach it to the container image under `/scripts`. If you're feeling adentorous, you can also install your own Python package -- the script will automatically pick up the class name. diff --git a/scripts/email-anonymizer.py b/scripts/email-anonymizer.py index 37277e4..4ca28d0 100644 --- a/scripts/email-anonymizer.py +++ b/scripts/email-anonymizer.py @@ -136,7 +136,7 @@ class SmartFilter(Filter): left, right = domain.split(":", 1) return left + ':' + (len(right)-1) * self.mask_symbol + ']' else: - return '[*.*.*.*]' + return '[' + self.mask_symbol + '.' + self.mask_symbol + '.' + self.mask_symbol + '.' + self.mask_symbol + ']' elif '.' in domain: # Normal domain s, tld = domain.rsplit('.', 1) return len(s) * self.mask_symbol + '.' + tld diff --git a/unit-tests/email-anonymizer-smart.bats b/unit-tests/email-anonymizer-smart.bats index bd64b7c..c80b839 100644 --- a/unit-tests/email-anonymizer-smart.bats +++ b/unit-tests/email-anonymizer-smart.bats @@ -29,31 +29,31 @@ Pelé@example.com EOF mapfile SMART <<'EOF' -p*e@*******.com -v*n@*******.com -d*l@*******.com -o*h@*******.com -x*x@*******.com -\"m*l\"@*******.com -\"v*m\"@*******.com -\"v*l\"@***************.com -e*d@***************.com -a*n@*********** -#*~@*******.org -\"(*a\"@*******.org -\" * \"@*******.org -e*e@********* -e*e@*.solutions -u*r@*** -u*r@*********** -u*r@[*.*.*.*] -u*r@[IPv6:***********] -P*é@*******.com -δ*ή@**********.δοκιμή -我*買@**.香港 -二*宮@**.日本 -м*ь@************.рф -स*क@*******.भारत +p#e@#######.com +v#n@#######.com +d#l@#######.com +o#h@#######.com +x#x@#######.com +\"m#l\"@#######.com +\"v#m\"@#######.com +\"v#l\"@###############.com +e#d@###############.com +a#n@########### +##~@#######.org +\"(#a\"@#######.org +\" # \"@#######.org +e#e@######### +e#e@#.solutions +u#r@### +u#r@########### +u#r@[#.#.#.#] +u#r@[IPv6:###########] +P#é@#######.com +δ#ή@##########.δοκιμή +我#買@##.香港 +二#宮@##.日本 +м#ь@############.рф +स#क@#######.भारत 20211207101128.0805BA272@31bfa77a2cab EOF @@ -67,7 +67,7 @@ EOF for index in "${!EMAILS[@]}"; do email="${EMAILS[$index]}" email=${email%$'\n'} # Remove trailing new line - result="$(echo "$email" | /code/scripts/email-anonymizer.sh smart)" + result="$(echo "$email" | /code/scripts/email-anonymizer.sh 'smart?mask_symbol=#')" result=${result%$'\n'} # Remove trailing new line expected="${SMART[$index]}" expected=${expected%$'\n'} # Remove trailing new line @@ -87,7 +87,7 @@ EOF for index in "${!MESSAGE_IDS[@]}"; do email="${MESSAGE_IDS[$index]}" email=${email%$'\n'} # Remove trailing new line - result="$(echo "$email" | /code/scripts/email-anonymizer.sh smart)" + result="$(echo "$email" | /code/scripts/email-anonymizer.sh 'smart?mask_symbol=#')" result=${result%$'\n'} # Remove trailing new line expected='{}' if [ "$result" != "$expected" ]; then