Add start and end flags to parallelize call (#2033)

This commit is contained in:
Adrià Casajús 2024-02-19 16:46:35 +01:00 committed by GitHub
parent 50c130a3a3
commit d0a6b8ed79
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,21 +1,44 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse
import time
from sqlalchemy import func from sqlalchemy import func
from app.models import Alias from app.models import Alias
from app.db import Session from app.db import Session
max_alias_id = Session.query(func.max(Alias.id)).scalar() parser = argparse.ArgumentParser(
prog="Backfill alias", description="Backfill alias las use"
)
parser.add_argument(
"-s", "--start_alias_id", default=0, type=int, help="Initial alias_id"
)
parser.add_argument("-e", "--end_alias_id", default=0, type=int, help="Last alias_id")
args = parser.parse_args()
alias_id_start = args.start_alias_id
max_alias_id = args.end_alias_id
if max_alias_id == 0:
max_alias_id = Session.query(func.max(Alias.id)).scalar()
print(f"Checking alias {alias_id_start} to {max_alias_id}")
step = 1000 step = 1000
el_query = "SELECT alias_id, MAX(id) from email_log where alias_id>=:start AND alias_id < :end GROUP BY alias_id" el_query = "SELECT alias_id, MAX(id) from email_log where alias_id>=:start AND alias_id < :end GROUP BY alias_id"
alias_query = "UPDATE alias set last_email_log_id = :el_id where id = :alias_id" alias_query = "UPDATE alias set last_email_log_id = :el_id where id = :alias_id"
updated = 0 updated = 0
for batch_start in range(0, max_alias_id, step): start_time = time.time()
for batch_start in range(alias_id_start, max_alias_id, step):
rows = Session.execute(el_query, {"start": batch_start, "end": batch_start + step}) rows = Session.execute(el_query, {"start": batch_start, "end": batch_start + step})
for row in rows: for row in rows:
Session.execute(alias_query, {"alias_id": row[0], "el_id": row[1]}) Session.execute(alias_query, {"alias_id": row[0], "el_id": row[1]})
Session.commit() Session.commit()
updated += 1 updated += 1
print(f"\rAlias {batch_start}/{max_alias_id} {updated}") elapsed = time.time() - start_time
time_per_alias = elapsed / (updated + 1)
last_batch_id = batch_start + step
remaining = max_alias_id - last_batch_id
time_remaining = (max_alias_id - last_batch_id) * time_per_alias
hours_remaining = time_remaining / 3600.0
print(
f"\rAlias {batch_start}/{max_alias_id} {updated} {hours_remaining:.2f}hrs remaining"
)
print("") print("")