mirror of
https://github.com/DiegoCaraballo/Email-extractor.git
synced 2024-09-20 06:46:01 +08:00
Save emails in .csv
This commit is contained in:
parent
16d7e9c917
commit
847d892e18
|
@ -19,9 +19,11 @@ import re
|
|||
from fake_useragent import UserAgent
|
||||
from socket import timeout
|
||||
from urllib.error import HTTPError, URLError
|
||||
from datetime import datetime
|
||||
import csv
|
||||
|
||||
imageExt = ["jpeg", "jpg", "exif", "tif", "tiff", "gif", "bmp", "png", "ppm",
|
||||
"pgm", "pbm", "pnm", "webp", "hdr", "heif", "bat", "bpg", "cgm", "svg"]
|
||||
imageExt = (".jpeg", ".jpg", ".exif", ".tif", ".tiff", ".gif", ".bmp", ".png", ".ppm",
|
||||
".pgm", ".pbm", ".pnm", ".webp", ".hdr", ".heif", ".bat", ".bpg", ".cgm", ".svg")
|
||||
ua = UserAgent()
|
||||
|
||||
count_email_in_phrase = 0
|
||||
|
@ -52,8 +54,9 @@ def menu():
|
|||
print("4 - Same as option 3 but with a list of keywords")
|
||||
print("5 - List emails - Listar correos")
|
||||
print("6 - Save emails in .txt file - Guardar correos en archivo .txt")
|
||||
print("7 - Delete Emails from Data Base")
|
||||
print("8 - Exit - Salir")
|
||||
print("7 - Save emails in .csv file - Guardar correos en archivo .csv")
|
||||
print("8 - Delete Emails from Data Base")
|
||||
print("9 - Exit - Salir")
|
||||
print("")
|
||||
|
||||
opcion = input("Enter option - Ingrese Opcion: ")
|
||||
|
@ -136,6 +139,9 @@ def menu():
|
|||
menu()
|
||||
|
||||
elif (opcion == "7"):
|
||||
guardarCsv("Emails.db")
|
||||
|
||||
elif (opcion == "8"):
|
||||
print("")
|
||||
print("1 - Delete emails from a especific URL")
|
||||
print("2 - Delete emails from a especific phrase")
|
||||
|
@ -159,7 +165,7 @@ def menu():
|
|||
time.sleep(2)
|
||||
menu()
|
||||
|
||||
elif (opcion == "8"):
|
||||
elif (opcion == "9"):
|
||||
sys.exit(0)
|
||||
|
||||
else:
|
||||
|
@ -682,13 +688,12 @@ def extractOnlyUrl(url):
|
|||
if(status != 200 or contentType == "audio/mpeg"):
|
||||
raise ValueError('Bad Url...')
|
||||
|
||||
|
||||
html = conn.read().decode('utf-8')
|
||||
|
||||
emails = re.findall(r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}', html)
|
||||
|
||||
for email in emails:
|
||||
if (email not in listUrl and email[-3:] not in imageExt):
|
||||
if (email not in listUrl and not email.endswith(imageExt)):
|
||||
count += 1
|
||||
print(str(count) + " - " + email)
|
||||
listUrl.append(email)
|
||||
|
@ -744,7 +749,7 @@ def extractUrl(url):
|
|||
print ("Searching in " + url)
|
||||
|
||||
for email in emails:
|
||||
if (email not in listUrl and email[-3:] not in imageExt):
|
||||
if (email not in listUrl and not email.endswith(imageExt)):
|
||||
count += 1
|
||||
print(str(count) + " - " + email)
|
||||
listUrl.append(email)
|
||||
|
@ -794,7 +799,7 @@ def extractUrl(url):
|
|||
emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
|
||||
|
||||
for email in emails:
|
||||
if (email not in listUrl and email[-3:] not in imageExt):
|
||||
if (email not in listUrl and not email.endswith(imageExt)):
|
||||
count += 1
|
||||
print(str(count) + " - " + email)
|
||||
listUrl.append(email)
|
||||
|
@ -909,8 +914,6 @@ def extractKeywordsList(txtFile):
|
|||
for key in keywordList:
|
||||
print(key)
|
||||
|
||||
|
||||
|
||||
# Limpia la pantalla según el sistema operativo
|
||||
def clear():
|
||||
try:
|
||||
|
@ -934,7 +937,7 @@ def searchSpecificLink(link, listEmails, frase):
|
|||
s = f.read().decode('utf-8')
|
||||
emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
|
||||
for email in emails:
|
||||
if (email not in listEmails and email[-3:] not in imageExt):
|
||||
if (email not in listEmails and not email.endswith(imageExt)):
|
||||
count_email_in_phrase += 1
|
||||
listEmails.append(email)
|
||||
print(str(count_email_in_phrase) + " - " + email)
|
||||
|
@ -955,6 +958,47 @@ def searchSpecificLink(link, listEmails, frase):
|
|||
print(e)
|
||||
pass
|
||||
|
||||
def guardarCsv(db_file):
|
||||
try:
|
||||
conn = sqlite3.connect(db_file)
|
||||
c = conn.cursor()
|
||||
|
||||
nameFile = datetime.now().strftime('csvemails_%Y_%m_%d_%H_%M_%S.csv')
|
||||
print("")
|
||||
print("Creating csv, please wait...")
|
||||
|
||||
f = open(nameFile, "w", newline="")
|
||||
writer = csv.writer(f)
|
||||
|
||||
header = ['Phrase', 'Email', 'Url']
|
||||
writer.writerow(header)
|
||||
|
||||
c.execute('SELECT * FROM emails')
|
||||
|
||||
for i in c:
|
||||
row = [str(i[1]), str(i[2]), str(i[3])]
|
||||
writer.writerow(row)
|
||||
|
||||
f.close()
|
||||
|
||||
conn.close()
|
||||
|
||||
input("Press enter to continue")
|
||||
menu()
|
||||
|
||||
except Error as e:
|
||||
print(e)
|
||||
input("Press enter to continue")
|
||||
menu()
|
||||
|
||||
except Exception as o:
|
||||
print(o)
|
||||
input("Press enter to continue")
|
||||
menu()
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# Inicio de Programa
|
||||
def Main():
|
||||
clear()
|
||||
|
|
23
README.md
23
README.md
|
@ -2,6 +2,9 @@
|
|||
<img width="560" height="400" src="https://github.com/DiegoCaraballo/Email-extractor/blob/master/EmailExtractor.PNG">
|
||||
</p>
|
||||
|
||||
# Add Feature: 13-07-2022
|
||||
- You can save the mailing list in a .csv file
|
||||
|
||||
# Fix: 13-09-2019
|
||||
- Fix - The script was pasted when searching for phrases on Google.
|
||||
- Add Requirements - pip install -r requirements.txt
|
||||
|
@ -12,9 +15,12 @@
|
|||
- (1) Extract emails from a single URL
|
||||
- (2) Extract emails from a URL (Two Levels) - Search on the page and all its URLs
|
||||
- (3) Do a Google search, save the Urls found and search the emails
|
||||
- (4) You can list the saved emails
|
||||
- (5) You can save the mailing list in a .txt file
|
||||
- (6) Delete Emails from data base
|
||||
- (4) Same as option 3 but with a list of keywords (TODO)
|
||||
- (5) You can list the saved emails
|
||||
- (6) You can save the mailing list in a .txt file
|
||||
- (7) You can save the mailing list in a .csv file
|
||||
- (8) Delete Emails from data base
|
||||
- (9) Exit
|
||||
|
||||
- The emails are stored in a Sqlite database ("Emails.db")
|
||||
|
||||
|
@ -22,9 +28,12 @@
|
|||
- (1) Extraer los correos de una única URL
|
||||
- (2) Extraer los correos de una Url (Dos Niveles) - Busca sobre la página y todas sus URL
|
||||
- (3) Hacer una busqueda en Google, guardar las Urls encontradas y buscar los correos en dichas Urls
|
||||
- (4) Los correos son guardados en una base de datos Sqlite
|
||||
- (5) Se pueden listar los correos guardados
|
||||
- (4) Igual que la opción 3 pero con una lista de palabras (TODO)
|
||||
- (5) Listar correos guardados
|
||||
- (6) Se pueden guardar los correo en un archivo .txt
|
||||
- (7) Se pueden guardar los correo en un archivo .csv
|
||||
- (8) Eliminar correos de la base de datos
|
||||
- (9) Salir
|
||||
|
||||
- Todos los correos son guardados en una base de datos Sqlite ("Emails.db")
|
||||
|
||||
|
@ -32,12 +41,8 @@
|
|||
|
||||
# Required modules - Modulos necesarios
|
||||
|
||||
**Update 13-09-2019**
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
[Software para Empresas](https://www.softtero.com)
|
||||
|
||||
[Extraer correos de paginas web con Python](http://www.pythondiario.com/2018/04/extraer-correos-electronicos-de-paginas.html)
|
||||
|
||||
## Docker
|
||||
|
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Loading…
Reference in a new issue