From 847d892e18849c9f58136f180a72eadac509cf18 Mon Sep 17 00:00:00 2001 From: Diego Caraballo Date: Wed, 13 Jul 2022 11:19:21 -0300 Subject: [PATCH] Save emails in .csv --- EmailExtractor.py | 72 +++++++++++++++++++++++++++++++++++++--------- README.md | 23 +++++++++------ requirements.txt | Bin 88 -> 188 bytes 3 files changed, 72 insertions(+), 23 deletions(-) diff --git a/EmailExtractor.py b/EmailExtractor.py index da7e09b..5dcdfd4 100644 --- a/EmailExtractor.py +++ b/EmailExtractor.py @@ -19,9 +19,11 @@ import re from fake_useragent import UserAgent from socket import timeout from urllib.error import HTTPError, URLError +from datetime import datetime +import csv -imageExt = ["jpeg", "jpg", "exif", "tif", "tiff", "gif", "bmp", "png", "ppm", - "pgm", "pbm", "pnm", "webp", "hdr", "heif", "bat", "bpg", "cgm", "svg"] +imageExt = (".jpeg", ".jpg", ".exif", ".tif", ".tiff", ".gif", ".bmp", ".png", ".ppm", + ".pgm", ".pbm", ".pnm", ".webp", ".hdr", ".heif", ".bat", ".bpg", ".cgm", ".svg") ua = UserAgent() count_email_in_phrase = 0 @@ -52,8 +54,9 @@ def menu(): print("4 - Same as option 3 but with a list of keywords") print("5 - List emails - Listar correos") print("6 - Save emails in .txt file - Guardar correos en archivo .txt") - print("7 - Delete Emails from Data Base") - print("8 - Exit - Salir") + print("7 - Save emails in .csv file - Guardar correos en archivo .csv") + print("8 - Delete Emails from Data Base") + print("9 - Exit - Salir") print("") opcion = input("Enter option - Ingrese Opcion: ") @@ -136,6 +139,9 @@ def menu(): menu() elif (opcion == "7"): + guardarCsv("Emails.db") + + elif (opcion == "8"): print("") print("1 - Delete emails from a especific URL") print("2 - Delete emails from a especific phrase") @@ -159,7 +165,7 @@ def menu(): time.sleep(2) menu() - elif (opcion == "8"): + elif (opcion == "9"): sys.exit(0) else: @@ -680,15 +686,14 @@ def extractOnlyUrl(url): contentType = conn.info().get_content_type() if(status != 200 or contentType == "audio/mpeg"): - raise ValueError('Bad Url...') - + raise ValueError('Bad Url...') html = conn.read().decode('utf-8') emails = re.findall(r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}', html) for email in emails: - if (email not in listUrl and email[-3:] not in imageExt): + if (email not in listUrl and not email.endswith(imageExt)): count += 1 print(str(count) + " - " + email) listUrl.append(email) @@ -736,7 +741,7 @@ def extractUrl(url): contentType = conn.info().get_content_type() if(status != 200 or contentType == "audio/mpeg"): - raise ValueError('Bad Url...') + raise ValueError('Bad Url...') html = conn.read().decode('utf-8') @@ -744,7 +749,7 @@ def extractUrl(url): print ("Searching in " + url) for email in emails: - if (email not in listUrl and email[-3:] not in imageExt): + if (email not in listUrl and not email.endswith(imageExt)): count += 1 print(str(count) + " - " + email) listUrl.append(email) @@ -794,7 +799,7 @@ def extractUrl(url): emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s) for email in emails: - if (email not in listUrl and email[-3:] not in imageExt): + if (email not in listUrl and not email.endswith(imageExt)): count += 1 print(str(count) + " - " + email) listUrl.append(email) @@ -909,8 +914,6 @@ def extractKeywordsList(txtFile): for key in keywordList: print(key) - - # Limpia la pantalla según el sistema operativo def clear(): try: @@ -934,7 +937,7 @@ def searchSpecificLink(link, listEmails, frase): s = f.read().decode('utf-8') emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s) for email in emails: - if (email not in listEmails and email[-3:] not in imageExt): + if (email not in listEmails and not email.endswith(imageExt)): count_email_in_phrase += 1 listEmails.append(email) print(str(count_email_in_phrase) + " - " + email) @@ -955,6 +958,47 @@ def searchSpecificLink(link, listEmails, frase): print(e) pass +def guardarCsv(db_file): + try: + conn = sqlite3.connect(db_file) + c = conn.cursor() + + nameFile = datetime.now().strftime('csvemails_%Y_%m_%d_%H_%M_%S.csv') + print("") + print("Creating csv, please wait...") + + f = open(nameFile, "w", newline="") + writer = csv.writer(f) + + header = ['Phrase', 'Email', 'Url'] + writer.writerow(header) + + c.execute('SELECT * FROM emails') + + for i in c: + row = [str(i[1]), str(i[2]), str(i[3])] + writer.writerow(row) + + f.close() + + conn.close() + + input("Press enter to continue") + menu() + + except Error as e: + print(e) + input("Press enter to continue") + menu() + + except Exception as o: + print(o) + input("Press enter to continue") + menu() + + finally: + conn.close() + # Inicio de Programa def Main(): clear() diff --git a/README.md b/README.md index 7432ecd..420eb6e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@

+# Add Feature: 13-07-2022 +- You can save the mailing list in a .csv file + # Fix: 13-09-2019 - Fix - The script was pasted when searching for phrases on Google. - Add Requirements - pip install -r requirements.txt @@ -12,9 +15,12 @@ - (1) Extract emails from a single URL - (2) Extract emails from a URL (Two Levels) - Search on the page and all its URLs - (3) Do a Google search, save the Urls found and search the emails -- (4) You can list the saved emails -- (5) You can save the mailing list in a .txt file -- (6) Delete Emails from data base +- (4) Same as option 3 but with a list of keywords (TODO) +- (5) You can list the saved emails +- (6) You can save the mailing list in a .txt file +- (7) You can save the mailing list in a .csv file +- (8) Delete Emails from data base +- (9) Exit - The emails are stored in a Sqlite database ("Emails.db") @@ -22,9 +28,12 @@ - (1) Extraer los correos de una única URL - (2) Extraer los correos de una Url (Dos Niveles) - Busca sobre la página y todas sus URL - (3) Hacer una busqueda en Google, guardar las Urls encontradas y buscar los correos en dichas Urls -- (4) Los correos son guardados en una base de datos Sqlite -- (5) Se pueden listar los correos guardados +- (4) Igual que la opción 3 pero con una lista de palabras (TODO) +- (5) Listar correos guardados - (6) Se pueden guardar los correo en un archivo .txt +- (7) Se pueden guardar los correo en un archivo .csv +- (8) Eliminar correos de la base de datos +- (9) Salir - Todos los correos son guardados en una base de datos Sqlite ("Emails.db") @@ -32,12 +41,8 @@ # Required modules - Modulos necesarios -**Update 13-09-2019** - pip install -r requirements.txt -[Software para Empresas](https://www.softtero.com) - [Extraer correos de paginas web con Python](http://www.pythondiario.com/2018/04/extraer-correos-electronicos-de-paginas.html) ## Docker diff --git a/requirements.txt b/requirements.txt index 20dca2f9ba13abd6476afb6fccf1187fce7de107..1e872d832cbdeb369d382b1d71487b4561461bf1 100644 GIT binary patch literal 188 zcmXwzK?=e^5CrQi_>_diE zM$g{rsoB~MYgQ5iXC0M9R`0$i^{PMUMzSs47Jf2omPj(O-;|_UjkLs@i*xyBSDdj~ QZpdCW^7^|YIhd}ve=0^DH~;_u literal 88 zcmWm5F%Ezr3_#Jncd?24MyT=l%KCraCeeCHrmfl)m;)~8AjMrKYjVdd gXJ>XuNCdgotitTkp$B~r9biR*p<9vXpY(?rKKW7_`Tzg`