Save emails in .csv

2024-09-20 06:46:01 +08:00 · 2022-07-13 11:19:21 -03:00 · 2022-07-13 11:19:21 -03:00 · 847d892e18
parent 16d7e9c917
commit 847d892e18
3 changed files with 72 additions and 23 deletions
--- a/EmailExtractor.py
+++ b/EmailExtractor.py
@ -19,9 +19,11 @@ import re
 from fake_useragent import UserAgent
 from socket import timeout
 from urllib.error import HTTPError, URLError
+from datetime import datetime
+import csv

-imageExt = ["jpeg", "jpg", "exif", "tif", "tiff", "gif", "bmp", "png", "ppm",
-			"pgm", "pbm", "pnm", "webp", "hdr", "heif", "bat", "bpg", "cgm", "svg"]
+imageExt = (".jpeg", ".jpg", ".exif", ".tif", ".tiff", ".gif", ".bmp", ".png", ".ppm",
+			".pgm", ".pbm", ".pnm", ".webp", ".hdr", ".heif", ".bat", ".bpg", ".cgm", ".svg")
 ua = UserAgent()

 count_email_in_phrase = 0
@ -52,8 +54,9 @@ def menu():
 		print("4 - Same as option 3 but with a list of keywords")
 		print("5 - List emails - Listar correos")
 		print("6 - Save emails in .txt file - Guardar correos en archivo .txt")
-		print("7 - Delete Emails from Data Base")
-		print("8 - Exit - Salir")
+		print("7 - Save emails in .csv file - Guardar correos en archivo .csv")
+		print("8 - Delete Emails from Data Base")
+		print("9 - Exit - Salir")
 		print("")

 		opcion = input("Enter option - Ingrese Opcion: ")
@ -136,6 +139,9 @@ def menu():
 				menu()

 		elif (opcion == "7"):
+			guardarCsv("Emails.db")
+
+		elif (opcion == "8"):
 			print("")
 			print("1 - Delete emails from a especific URL")
 			print("2 - Delete emails from a especific phrase")
@ -159,7 +165,7 @@ def menu():
 				time.sleep(2)
 				menu()
 		
-		elif (opcion == "8"):
+		elif (opcion == "9"):
 			sys.exit(0)

 		else:			
@ -682,13 +688,12 @@ def extractOnlyUrl(url):
 		if(status != 200 or contentType == "audio/mpeg"):
 			raise ValueError('Bad Url...')

-
 		html = conn.read().decode('utf-8')

 		emails = re.findall(r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}', html)

 		for email in emails:
-			if (email not in listUrl and email[-3:] not in imageExt):
+			if (email not in listUrl and not email.endswith(imageExt)):
 				count += 1
 				print(str(count) + " - " + email)
 				listUrl.append(email)
@ -744,7 +749,7 @@ def extractUrl(url):
 		print ("Searching in " + url)
 		
 		for email in emails:
-			if (email not in listUrl and email[-3:] not in imageExt):
+			if (email not in listUrl and not email.endswith(imageExt)):
 					count += 1
 					print(str(count) + " - " + email)
 					listUrl.append(email)
@ -794,7 +799,7 @@ def extractUrl(url):
 						emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)

 						for email in emails:
-							if (email not in listUrl and email[-3:] not in imageExt):
+							if (email not in listUrl and not email.endswith(imageExt)):
 								count += 1
 								print(str(count) + " - " + email)
 								listUrl.append(email)
@ -909,8 +914,6 @@ def extractKeywordsList(txtFile):
 	for key in keywordList:
    		print(key)

-
-
 # Limpia la pantalla según el sistema operativo
 def clear():
 	try:
@ -934,7 +937,7 @@ def searchSpecificLink(link, listEmails, frase):
 			s = f.read().decode('utf-8')
 			emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
 			for email in emails:
-				if (email not in listEmails and email[-3:] not in imageExt):
+				if (email not in listEmails and not email.endswith(imageExt)):
 					count_email_in_phrase += 1
 					listEmails.append(email)
 					print(str(count_email_in_phrase) + " - " + email)										
@ -955,6 +958,47 @@ def searchSpecificLink(link, listEmails, frase):
 		print(e)
 		pass

+def guardarCsv(db_file):
+	try:
+		conn = sqlite3.connect(db_file)
+		c = conn.cursor()
+
+		nameFile = datetime.now().strftime('csvemails_%Y_%m_%d_%H_%M_%S.csv')
+		print("")
+		print("Creating csv, please wait...")
+		
+		f = open(nameFile, "w", newline="")
+		writer = csv.writer(f)
+
+		header = ['Phrase', 'Email', 'Url']
+		writer.writerow(header)
+	
+		c.execute('SELECT * FROM emails')
+				
+		for i in c:
+			row = [str(i[1]), str(i[2]), str(i[3])]
+			writer.writerow(row)
+			
+		f.close()
+			
+		conn.close()
+		
+		input("Press enter to continue")
+		menu()
+		
+	except Error as e:
+		print(e)
+		input("Press enter to continue")
+		menu()
+		
+	except Exception as o:
+		print(o)
+		input("Press enter to continue")
+		menu()
+		
+	finally:
+		conn.close()
+
 # Inicio de Programa
 def Main():
 	clear()
--- a/README.md
+++ b/README.md
@ -2,6 +2,9 @@
  <img width="560" height="400" src="https://github.com/DiegoCaraballo/Email-extractor/blob/master/EmailExtractor.PNG">
 </p>

+# Add Feature: 13-07-2022
+- You can save the mailing list in a .csv file
+
 # Fix: 13-09-2019
 - Fix - The script was pasted when searching for phrases on Google.
 - Add Requirements - pip install -r requirements.txt
@ -12,9 +15,12 @@
 - (1) Extract emails from a single URL
 - (2) Extract emails from a URL (Two Levels) - Search on the page and all its URLs
 - (3) Do a Google search, save the Urls found and search the emails
- (4) You can list the saved emails
- (5) You can save the mailing list in a .txt file
- (6) Delete Emails from data base
+- (4) Same as option 3 but with a list of keywords (TODO)
+- (5) You can list the saved emails
+- (6) You can save the mailing list in a .txt file
+- (7) You can save the mailing list in a .csv file
+- (8) Delete Emails from data base
+- (9) Exit

 - The emails are stored in a Sqlite database ("Emails.db")

@ -22,9 +28,12 @@
 - (1) Extraer los correos de una única URL
 - (2) Extraer los correos de una Url (Dos Niveles) - Busca sobre la página y todas sus URL
 - (3) Hacer una busqueda en Google, guardar las Urls encontradas y buscar los correos en dichas Urls
- (4) Los correos son guardados en una base de datos Sqlite
- (5) Se pueden listar los correos guardados
+- (4) Igual que la opción 3 pero con una lista de palabras (TODO)
+- (5) Listar correos guardados
 - (6) Se pueden guardar los correo en un archivo .txt
+- (7) Se pueden guardar los correo en un archivo .csv
+- (8) Eliminar correos de la base de datos
+- (9) Salir

 - Todos los correos son guardados en una base de datos Sqlite ("Emails.db")

@ -32,12 +41,8 @@

 # Required modules - Modulos necesarios

-**Update 13-09-2019**
-
 pip install -r requirements.txt

-[Software para Empresas](https://www.softtero.com)
-
 [Extraer correos de paginas web con Python](http://www.pythondiario.com/2018/04/extraer-correos-electronicos-de-paginas.html)

 ## Docker
--- a/requirements.txt
+++ b/requirements.txt