From 4975c8906642478ad10139ebe5706b57f58cdca8 Mon Sep 17 00:00:00 2001 From: Bohdan Shtepan Date: Wed, 11 Jun 2025 10:53:34 +0200 Subject: [PATCH] SL abuser system improvements (#2494) * IDTEAM-4828: Allow to search abuser by user id not just by email. * IDTEAM-4827: Add abuser admin logs to abuser page. * IDTEAM-4826: Run the abuser bundle generation in a job with low prio. --- app/abuser.py | 22 ++++ app/abuser_utils.py | 22 +--- app/admin_model.py | 52 +++++++-- app/constants.py | 1 + app/jobs/mark_abuser_job.py | 38 +++++++ job_runner.py | 5 + oneshot/backfill_abuser_data.py | 2 +- templates/admin/abuser_lookup.html | 162 ++++++++++++++++++----------- tests/test_abuser_utils.py | 36 +++---- 9 files changed, 236 insertions(+), 104 deletions(-) create mode 100644 app/abuser.py create mode 100644 app/jobs/mark_abuser_job.py diff --git a/app/abuser.py b/app/abuser.py new file mode 100644 index 00000000..fa94472e --- /dev/null +++ b/app/abuser.py @@ -0,0 +1,22 @@ +from typing import Optional + +from app.abuser_audit_log_utils import emit_abuser_audit_log, AbuserAuditLogAction +from app.db import Session +from app.jobs.mark_abuser_job import MarkAbuserJob +from app.models import User + + +def mark_user_as_abuser( + abuse_user: User, note: str, admin_id: Optional[int] = None +) -> None: + abuse_user.disabled = True + + emit_abuser_audit_log( + user_id=abuse_user.id, + action=AbuserAuditLogAction.MarkAbuser, + message=note, + admin_id=admin_id, + ) + job = MarkAbuserJob(user=abuse_user) + job.store_job_in_db() + Session.commit() diff --git a/app/abuser_utils.py b/app/abuser_utils.py index e0cdcb52..7dc40d0e 100644 --- a/app/abuser_utils.py +++ b/app/abuser_utils.py @@ -51,22 +51,7 @@ def check_if_abuser_email(new_address: str) -> Optional[AbuserLookup]: ) -def mark_user_as_abuser( - abuse_user: User, note: str, admin_id: Optional[int] = None -) -> None: - abuse_user.disabled = True - - emit_abuser_audit_log( - user_id=abuse_user.id, - action=AbuserAuditLogAction.MarkAbuser, - message=note, - admin_id=admin_id, - ) - Session.commit() - _store_abuse_data(abuse_user) - - -def _store_abuse_data(user: User) -> None: +def store_abuse_data(user: User) -> None: """ Archive the given abusive user's data and update blocklist/lookup tables. """ @@ -133,7 +118,7 @@ def _store_abuse_data(user: User) -> None: mac_key_bytes = config.MAC_KEY master_key_bytes = config.MASTER_ENC_KEY - for raw_identifier_address in all_identifiers_raw: + for idx, raw_identifier_address in enumerate(all_identifiers_raw): if not raw_identifier_address: continue @@ -168,6 +153,9 @@ def _store_abuse_data(user: User) -> None: Session.add(abuser_lookup_entry) + if idx % 1000 == 0: + Session.flush() + Session.commit() except Exception: Session.rollback() diff --git a/app/admin_model.py b/app/admin_model.py index a3d0bcf8..1ed232ea 100644 --- a/app/admin_model.py +++ b/app/admin_model.py @@ -1,5 +1,7 @@ from __future__ import annotations +import json +from datetime import datetime from typing import Optional, List, Dict import arrow @@ -15,8 +17,9 @@ from flask_login import current_user from markupsafe import Markup from app import models, s3, config +from app.abuser_audit_log_utils import AbuserAuditLog +from app.abuser import mark_user_as_abuser from app.abuser_utils import ( - mark_user_as_abuser, unmark_as_abusive_user, get_abuser_bundles_for_address, ) @@ -56,8 +59,6 @@ from app.newsletter_utils import send_newsletter_to_user, send_newsletter_to_add from app.proton.proton_unlink import perform_proton_account_unlink from app.user_audit_log_utils import emit_user_audit_log, UserAuditLogAction from app.utils import sanitize_email -from datetime import datetime -import json def _admin_action_formatter(view, context, model, name): @@ -1114,19 +1115,42 @@ class CustomDomainSearchAdmin(BaseView): class AbuserLookupResult: def __init__(self): self.no_match: bool = False - self.email: Optional[str] = None + self.query: Optional[str | int] = None self.bundles: Optional[List[Dict]] = None + self.audit_log: Optional[List[Dict]] = None @staticmethod - def from_email(email: Optional[str]) -> AbuserLookupResult: + def from_email_or_user_id(query: str) -> AbuserLookupResult: out = AbuserLookupResult() + email: str + audit_log: List[AbuserAuditLog] = [] - if email is None or email == "": + if query is None or query == "": out.no_match = True return out - out.email = email + if query.isnumeric(): + user_id = int(query) + user = User.get(user_id) + + if not user: + out.no_match = True + + return out + + email = user.email + audit_log = AbuserAuditLog.filter(AbuserAuditLog.user_id == user.id).all() + else: + email = sanitize_email(query) + user = User.get_by(email=email) + + if user: + audit_log = AbuserAuditLog.filter( + AbuserAuditLog.user_id == user.id + ).all() + + out.query = query bundles = get_abuser_bundles_for_address( target_address=email, admin_id=current_user.id, @@ -1153,6 +1177,15 @@ class AbuserLookupResult: AbuserLookupResult.convert_dt(alias_item) out.bundles = bundles + out.audit_log = [ + { + "admin_id": alog.admin_id, + "action": alog.action, + "message": alog.message, + "created_at": alog.created_at, + } + for alog in audit_log + ] return out @@ -1174,13 +1207,12 @@ class AbuserLookupAdmin(BaseView): @expose("/", methods=["GET", "POST"]) def index(self): - query = request.args.get("email") + query: Optional[str] = request.args.get("search") if query is None: result = AbuserLookupResult() else: - email = sanitize_email(query) - result = AbuserLookupResult.from_email(email) + result = AbuserLookupResult.from_email_or_user_id(query) return self.render( "admin/abuser_lookup.html", diff --git a/app/constants.py b/app/constants.py index fa7fe0b5..c3fb132a 100644 --- a/app/constants.py +++ b/app/constants.py @@ -19,3 +19,4 @@ class JobType(enum.Enum): SEND_ALIAS_CREATION_EVENTS = "send-alias-creation-events" SEND_EVENT_TO_WEBHOOK = "send-event-to-webhook" SYNC_SUBSCRIPTION = "sync-subscription" + ABUSER_MARK = "abuser-mark" diff --git a/app/jobs/mark_abuser_job.py b/app/jobs/mark_abuser_job.py new file mode 100644 index 00000000..fd71e87c --- /dev/null +++ b/app/jobs/mark_abuser_job.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import Optional + +import arrow + +from app.abuser_utils import store_abuse_data +from app.constants import JobType +from app.models import ( + User, + Job, + JobPriority, +) + + +class MarkAbuserJob: + def __init__(self, user: User): + self._user: User = user + + def run(self) -> None: + store_abuse_data(user=self._user) + + @staticmethod + def create_from_job(job: Job) -> Optional[MarkAbuserJob]: + user = User.get(job.payload["user_id"]) + if not user: + return None + + return MarkAbuserJob(user) + + def store_job_in_db(self) -> Job: + return Job.create( + name=JobType.ABUSER_MARK.value, + payload={"user_id": self._user.id}, + priority=JobPriority.Low, + run_at=arrow.now(), + commit=True, + ) diff --git a/job_runner.py b/job_runner.py index 36b763d9..6be5036b 100644 --- a/job_runner.py +++ b/job_runner.py @@ -23,6 +23,7 @@ from app.events.event_dispatcher import PostgresDispatcher from app.import_utils import handle_batch_import from app.jobs.event_jobs import send_alias_creation_events_for_user from app.jobs.export_user_data_job import ExportUserDataJob +from app.jobs.mark_abuser_job import MarkAbuserJob from app.jobs.send_event_job import SendEventToWebhookJob from app.jobs.sync_subscription_job import SyncSubscriptionJob from app.log import LOG @@ -291,6 +292,10 @@ def process_job(job: Job): sync_job = SyncSubscriptionJob.create_from_job(job) if sync_job: sync_job.run(HttpEventSink()) + elif job.name == JobType.ABUSER_MARK.value: + mark_abuser_job = MarkAbuserJob.create_from_job(job) + if mark_abuser_job: + mark_abuser_job.run() else: LOG.e("Unknown job name %s", job.name) diff --git a/oneshot/backfill_abuser_data.py b/oneshot/backfill_abuser_data.py index 59b07a2b..4589b1f5 100644 --- a/oneshot/backfill_abuser_data.py +++ b/oneshot/backfill_abuser_data.py @@ -4,7 +4,7 @@ from typing import List from sqlalchemy import func -from app.abuser_utils import mark_user_as_abuser +from app.abuser import mark_user_as_abuser from app.db import Session from app.models import User, AbuserData diff --git a/templates/admin/abuser_lookup.html b/templates/admin/abuser_lookup.html index 84d7a008..f58c794e 100644 --- a/templates/admin/abuser_lookup.html +++ b/templates/admin/abuser_lookup.html @@ -4,50 +4,50 @@
Overview
- - - - - + + + + + - - - - - + + + + +
User IDPrimary email addressUser created
User IDPrimary email addressUser created
- {% if bundle.get('user', None) %} - {{ bundle.get('user').id }} - {% else %} - {{ bundle.get('account_id') }} - {% endif %} - - {% if bundle.get('user', None) %} - {{ bundle.get('user').email }} - {% else %} - {{ bundle.get('email', '') }} - {% endif %} - {{ bundle.get('user_created_at', '').strftime('%B %d, %Y %I:%M %p') }}
+ {% if bundle.get('user', None) %} + {{ bundle.get('user').id }} + {% else %} + {{ bundle.get('account_id') }} + {% endif %} + + {% if bundle.get('user', None) %} + {{ bundle.get('user').email }} + {% else %} + {{ bundle.get('email', '') }} + {% endif %} + {{ bundle.get('user_created_at', '').strftime('%B %d, %Y %I:%M %p') }}
{%- endmacro %} {% macro show_emails_table(emails) -%} - - - - - + + + + + - {% for idx, email in emails|enumerate %} - - - - - - {% endfor %} + {% for idx, email in emails|enumerate %} + + + + + + {% endfor %}
#Email addressDate created
#Email addressDate created
{{ idx + 1 }}{{ email.get('address', '') }}{{ email.get('created_at', '').strftime('%B %d, %Y %I:%M %p') }}
{{ idx + 1 }}{{ email.get('address', '') }}{{ email.get('created_at', '').strftime('%B %d, %Y %I:%M %p') }}
{%- endmacro %} @@ -63,7 +63,10 @@ {{ show_emails_table(bundle.get('mailboxes', [])) }} {% endif %}
- +

{%- endmacro %} @@ -76,31 +79,74 @@ {% endfor %} {%- endmacro %} +{% macro show_audit_log(audit_log) -%} + {% if audit_log and audit_log|length > 0 %} +

+ +

+
+
+
+
+ + + + + + + + + + + + {% for idx, log in audit_log|enumerate %} + + + + + + + + {% endfor %} + +
#ActionMessageDate createdAdmin User ID
{{ idx + 1 }} + {{ log.get('action', '') }} + {{ log.get('message', '') }}{{ log.get('created_at', '').strftime('%B %d, %Y %I:%M %p') }} + {{ log.admin_id }} +
+
+
+
+
+
+ {% endif %} +{%- endmacro %} {% block body %} -
-
-
- - -
- -
-
- {% if data.no_match and query %} - - - {% endif %} - {% if data.bundles %} -
-

Found abuser data for {{ data.email }}

- {{ show_bundles(data.bundles) }} +
+
+ + +
+ +
+
+ {% if data.no_match and query %} + + {% endif %} + {% if data.bundles %} +
+

Found abuser data for {{ data.query }}

+ {{ show_audit_log(data.audit_log) }} + {{ show_bundles(data.bundles) }} +
+ {% endif %} - {% endif %} - {% endblock %} diff --git a/tests/test_abuser_utils.py b/tests/test_abuser_utils.py index 1546ff2b..036524b8 100644 --- a/tests/test_abuser_utils.py +++ b/tests/test_abuser_utils.py @@ -7,14 +7,14 @@ from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import hashes as crypto_hashes from cryptography.hazmat.primitives.kdf.hkdf import HKDF +from app import constants from app.abuser_utils import ( - mark_user_as_abuser, check_if_abuser_email, get_abuser_bundles_for_address, unmark_as_abusive_user, _derive_key_for_identifier, + store_abuse_data, ) -from app import constants from app.db import Session from app.models import AbuserLookup, AbuserData, Alias, Mailbox, User from tests.utils import random_email, create_new_user @@ -136,7 +136,7 @@ def test_archive_basic_user(flask_client, monkeypatch): mailbox1_email_normalized = mailbox1.email.lower() Session.commit() - mark_user_as_abuser(user, "") + store_abuse_data(user) ab_data = AbuserData.filter_by(user_id=user.id).first() assert ab_data is not None @@ -180,7 +180,7 @@ def test_archive_user_with_no_aliases_or_mailboxes(flask_client, monkeypatch): user_primary_email_normalized = user.email.lower() Alias.filter_by(user_id=user.id).delete(synchronize_session=False) Session.commit() - mark_user_as_abuser(user, "") + store_abuse_data(user) ab_data = AbuserData.filter_by(user_id=user.id).first() assert ab_data is not None @@ -224,7 +224,7 @@ def test_duplicate_addresses_do_not_create_duplicate_lookups(flask_client, monke default_mb.email = duplicate_email_normalized Session.add(default_mb) Session.commit() - mark_user_as_abuser(user, "") + store_abuse_data(user) identifier_hmac_duplicate = calculate_hmac(duplicate_email_normalized) ab_data = AbuserData.filter_by(user_id=user.id).first() @@ -253,7 +253,7 @@ def test_invalid_user_or_identifier_fails_gracefully(flask_client, monkeypatch): with pytest.raises( ValueError, match=f"User ID {user_obj_no_email.id} must have a primary email" ): - mark_user_as_abuser(user_obj_no_email, "") + store_abuse_data(user_obj_no_email) def test_can_decrypt_bundle_for_all_valid_identifiers(flask_client, monkeypatch): @@ -283,7 +283,7 @@ def test_can_decrypt_bundle_for_all_valid_identifiers(flask_client, monkeypatch) mailbox1_email_normalized = mailbox1.email.lower() Session.commit() - mark_user_as_abuser(user, "") + store_abuse_data(user) ab_data = AbuserData.filter_by(user_id=user.id).first() assert ab_data is not None @@ -318,7 +318,7 @@ def test_db_rollback_on_error(monkeypatch, flask_client): monkeypatch.setattr(Session, "commit", mock_commit_failure) with pytest.raises(RuntimeError, match="Simulated DB failure during commit"): - mark_user_as_abuser(user, "") + store_abuse_data(user) monkeypatch.setattr(Session, "commit", original_commit) # Restore Session.rollback() @@ -333,7 +333,7 @@ def test_db_rollback_on_error(monkeypatch, flask_client): def test_unarchive_abusive_user_removes_data(flask_client, monkeypatch): user = create_new_user() email_normalized = user.email.lower() - mark_user_as_abuser(user, "") + store_abuse_data(user) assert AbuserData.filter_by(user_id=user.id).first() is not None assert get_lookup_count_for_address(email_normalized) > 0 @@ -357,7 +357,7 @@ def test_unarchive_idempotent_on_missing_data(flask_client, monkeypatch): def test_abuser_data_deletion_cascades_to_lookup(flask_client, monkeypatch): user = create_new_user() - mark_user_as_abuser(user, "") + store_abuse_data(user) ab_data = AbuserData.filter_by(user_id=user.id).first() assert ab_data is not None @@ -374,7 +374,7 @@ def test_abuser_data_deletion_cascades_to_lookup(flask_client, monkeypatch): def test_archive_then_unarchive_then_rearchive_is_consistent(flask_client, monkeypatch): user = create_new_user() - mark_user_as_abuser(user, "") + store_abuse_data(user) ab_data1 = AbuserData.filter_by(user_id=user.id).first() assert ab_data1 is not None @@ -383,7 +383,7 @@ def test_archive_then_unarchive_then_rearchive_is_consistent(flask_client, monke assert AbuserData.filter_by(user_id=user.id).first() is None - mark_user_as_abuser(user, "") + store_abuse_data(user) ab_data2 = AbuserData.filter_by(user_id=user.id).first() assert ab_data2 is not None @@ -393,7 +393,7 @@ def test_archive_then_unarchive_then_rearchive_is_consistent(flask_client, monke def test_get_abuser_bundles_returns_bundle_for_primary_email(flask_client, monkeypatch): user = create_new_user() email_normalized = user.email.lower() - mark_user_as_abuser(user, "") + store_abuse_data(user) bundles = get_abuser_bundles_for_address(email_normalized, -1) assert len(bundles) == 1 @@ -429,7 +429,7 @@ def test_get_abuser_bundles_from_alias_address(flask_client, monkeypatch): mailbox_id=user.default_mailbox_id, commit=True, ) - mark_user_as_abuser(user, "") + store_abuse_data(user) results = get_abuser_bundles_for_address(alias_email_normalized, -1) assert len(results) == 1 @@ -469,7 +469,7 @@ def test_get_abuser_bundles_from_mailbox_address(flask_client, monkeypatch): Session.commit() current_mailbox_email_normalized = mailbox.email.lower() - mark_user_as_abuser(user, "") + store_abuse_data(user) results = get_abuser_bundles_for_address(current_mailbox_email_normalized, -1) @@ -488,7 +488,7 @@ def test_get_abuser_bundles_with_corrupt_encrypted_k_bundle_is_skipped( flask_client, monkeypatch ): user = create_new_user() - mark_user_as_abuser(user, "") + store_abuse_data(user) identifier_hmac = calculate_hmac(user.email) lookup_entry = AbuserLookup.filter_by(hashed_address=identifier_hmac).first() @@ -512,7 +512,7 @@ def test_get_abuser_bundles_with_corrupt_main_bundle_is_skipped( flask_client, monkeypatch ): user = create_new_user() - mark_user_as_abuser(user, "") + store_abuse_data(user) ab_data = AbuserData.filter_by(user_id=user.id).first() assert ab_data is not None @@ -550,7 +550,7 @@ def test_archive_and_fetch_flow_end_to_end(flask_client, monkeypatch): Session.commit() current_mailbox_email_normalized = mailbox.email.lower() - mark_user_as_abuser(user, "") + store_abuse_data(user) bundles = get_abuser_bundles_for_address(user.email, -1) assert len(bundles) == 1