From 847d892e18849c9f58136f180a72eadac509cf18 Mon Sep 17 00:00:00 2001
From: Diego Caraballo
Date: Wed, 13 Jul 2022 11:19:21 -0300
Subject: [PATCH] Save emails in .csv
---
EmailExtractor.py | 72 +++++++++++++++++++++++++++++++++++++---------
README.md | 23 +++++++++------
requirements.txt | Bin 88 -> 188 bytes
3 files changed, 72 insertions(+), 23 deletions(-)
diff --git a/EmailExtractor.py b/EmailExtractor.py
index da7e09b..5dcdfd4 100644
--- a/EmailExtractor.py
+++ b/EmailExtractor.py
@@ -19,9 +19,11 @@ import re
from fake_useragent import UserAgent
from socket import timeout
from urllib.error import HTTPError, URLError
+from datetime import datetime
+import csv
-imageExt = ["jpeg", "jpg", "exif", "tif", "tiff", "gif", "bmp", "png", "ppm",
- "pgm", "pbm", "pnm", "webp", "hdr", "heif", "bat", "bpg", "cgm", "svg"]
+imageExt = (".jpeg", ".jpg", ".exif", ".tif", ".tiff", ".gif", ".bmp", ".png", ".ppm",
+ ".pgm", ".pbm", ".pnm", ".webp", ".hdr", ".heif", ".bat", ".bpg", ".cgm", ".svg")
ua = UserAgent()
count_email_in_phrase = 0
@@ -52,8 +54,9 @@ def menu():
print("4 - Same as option 3 but with a list of keywords")
print("5 - List emails - Listar correos")
print("6 - Save emails in .txt file - Guardar correos en archivo .txt")
- print("7 - Delete Emails from Data Base")
- print("8 - Exit - Salir")
+ print("7 - Save emails in .csv file - Guardar correos en archivo .csv")
+ print("8 - Delete Emails from Data Base")
+ print("9 - Exit - Salir")
print("")
opcion = input("Enter option - Ingrese Opcion: ")
@@ -136,6 +139,9 @@ def menu():
menu()
elif (opcion == "7"):
+ guardarCsv("Emails.db")
+
+ elif (opcion == "8"):
print("")
print("1 - Delete emails from a especific URL")
print("2 - Delete emails from a especific phrase")
@@ -159,7 +165,7 @@ def menu():
time.sleep(2)
menu()
- elif (opcion == "8"):
+ elif (opcion == "9"):
sys.exit(0)
else:
@@ -680,15 +686,14 @@ def extractOnlyUrl(url):
contentType = conn.info().get_content_type()
if(status != 200 or contentType == "audio/mpeg"):
- raise ValueError('Bad Url...')
-
+ raise ValueError('Bad Url...')
html = conn.read().decode('utf-8')
emails = re.findall(r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}', html)
for email in emails:
- if (email not in listUrl and email[-3:] not in imageExt):
+ if (email not in listUrl and not email.endswith(imageExt)):
count += 1
print(str(count) + " - " + email)
listUrl.append(email)
@@ -736,7 +741,7 @@ def extractUrl(url):
contentType = conn.info().get_content_type()
if(status != 200 or contentType == "audio/mpeg"):
- raise ValueError('Bad Url...')
+ raise ValueError('Bad Url...')
html = conn.read().decode('utf-8')
@@ -744,7 +749,7 @@ def extractUrl(url):
print ("Searching in " + url)
for email in emails:
- if (email not in listUrl and email[-3:] not in imageExt):
+ if (email not in listUrl and not email.endswith(imageExt)):
count += 1
print(str(count) + " - " + email)
listUrl.append(email)
@@ -794,7 +799,7 @@ def extractUrl(url):
emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
for email in emails:
- if (email not in listUrl and email[-3:] not in imageExt):
+ if (email not in listUrl and not email.endswith(imageExt)):
count += 1
print(str(count) + " - " + email)
listUrl.append(email)
@@ -909,8 +914,6 @@ def extractKeywordsList(txtFile):
for key in keywordList:
print(key)
-
-
# Limpia la pantalla según el sistema operativo
def clear():
try:
@@ -934,7 +937,7 @@ def searchSpecificLink(link, listEmails, frase):
s = f.read().decode('utf-8')
emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
for email in emails:
- if (email not in listEmails and email[-3:] not in imageExt):
+ if (email not in listEmails and not email.endswith(imageExt)):
count_email_in_phrase += 1
listEmails.append(email)
print(str(count_email_in_phrase) + " - " + email)
@@ -955,6 +958,47 @@ def searchSpecificLink(link, listEmails, frase):
print(e)
pass
+def guardarCsv(db_file):
+ try:
+ conn = sqlite3.connect(db_file)
+ c = conn.cursor()
+
+ nameFile = datetime.now().strftime('csvemails_%Y_%m_%d_%H_%M_%S.csv')
+ print("")
+ print("Creating csv, please wait...")
+
+ f = open(nameFile, "w", newline="")
+ writer = csv.writer(f)
+
+ header = ['Phrase', 'Email', 'Url']
+ writer.writerow(header)
+
+ c.execute('SELECT * FROM emails')
+
+ for i in c:
+ row = [str(i[1]), str(i[2]), str(i[3])]
+ writer.writerow(row)
+
+ f.close()
+
+ conn.close()
+
+ input("Press enter to continue")
+ menu()
+
+ except Error as e:
+ print(e)
+ input("Press enter to continue")
+ menu()
+
+ except Exception as o:
+ print(o)
+ input("Press enter to continue")
+ menu()
+
+ finally:
+ conn.close()
+
# Inicio de Programa
def Main():
clear()
diff --git a/README.md b/README.md
index 7432ecd..420eb6e 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,9 @@
+# Add Feature: 13-07-2022
+- You can save the mailing list in a .csv file
+
# Fix: 13-09-2019
- Fix - The script was pasted when searching for phrases on Google.
- Add Requirements - pip install -r requirements.txt
@@ -12,9 +15,12 @@
- (1) Extract emails from a single URL
- (2) Extract emails from a URL (Two Levels) - Search on the page and all its URLs
- (3) Do a Google search, save the Urls found and search the emails
-- (4) You can list the saved emails
-- (5) You can save the mailing list in a .txt file
-- (6) Delete Emails from data base
+- (4) Same as option 3 but with a list of keywords (TODO)
+- (5) You can list the saved emails
+- (6) You can save the mailing list in a .txt file
+- (7) You can save the mailing list in a .csv file
+- (8) Delete Emails from data base
+- (9) Exit
- The emails are stored in a Sqlite database ("Emails.db")
@@ -22,9 +28,12 @@
- (1) Extraer los correos de una única URL
- (2) Extraer los correos de una Url (Dos Niveles) - Busca sobre la página y todas sus URL
- (3) Hacer una busqueda en Google, guardar las Urls encontradas y buscar los correos en dichas Urls
-- (4) Los correos son guardados en una base de datos Sqlite
-- (5) Se pueden listar los correos guardados
+- (4) Igual que la opción 3 pero con una lista de palabras (TODO)
+- (5) Listar correos guardados
- (6) Se pueden guardar los correo en un archivo .txt
+- (7) Se pueden guardar los correo en un archivo .csv
+- (8) Eliminar correos de la base de datos
+- (9) Salir
- Todos los correos son guardados en una base de datos Sqlite ("Emails.db")
@@ -32,12 +41,8 @@
# Required modules - Modulos necesarios
-**Update 13-09-2019**
-
pip install -r requirements.txt
-[Software para Empresas](https://www.softtero.com)
-
[Extraer correos de paginas web con Python](http://www.pythondiario.com/2018/04/extraer-correos-electronicos-de-paginas.html)
## Docker
diff --git a/requirements.txt b/requirements.txt
index 20dca2f9ba13abd6476afb6fccf1187fce7de107..1e872d832cbdeb369d382b1d71487b4561461bf1 100644
GIT binary patch
literal 188
zcmXwzK?=e^5CrQi_>_diE
zM$g{rsoB~MYgQ5iXC0M9R`0$i^{PMUMzSs47Jf2omPj(O-;|_UjkLs@i*xyBSDdj~
QZpdCW^7^|YIhd}ve=0^DH~;_u
literal 88
zcmWm5F%Ezr3_#Jncd?24MyT=l%KCraCeeCHrmfl)m;)~8AjMrKYjVdd
gXJ>XuNCdgotitTkp$B~r9biR*p<9vXpY(?rKKW7_`Tzg`