2018-02-08 02:13:19 +08:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
2018-02-09 03:53:13 +08:00
|
|
|
# Comentarios en Español
|
|
|
|
# Blog: www.pythondiario.com
|
2018-02-08 02:13:19 +08:00
|
|
|
|
2018-02-08 22:29:19 +08:00
|
|
|
from googlesearch import search
|
2019-09-14 05:57:55 +08:00
|
|
|
from socket import timeout
|
|
|
|
import http
|
2018-02-09 06:20:27 +08:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import urllib.request
|
2019-09-14 05:57:55 +08:00
|
|
|
from urllib.request import URLError, HTTPError
|
2018-02-08 02:13:19 +08:00
|
|
|
import random
|
|
|
|
import os
|
|
|
|
import time
|
2018-02-08 11:15:41 +08:00
|
|
|
import sqlite3
|
|
|
|
from sqlite3 import Error
|
2018-02-09 03:53:13 +08:00
|
|
|
import sys
|
2018-02-09 08:28:13 +08:00
|
|
|
import re
|
2019-01-22 07:58:24 +08:00
|
|
|
from fake_useragent import UserAgent
|
2019-01-30 08:35:00 +08:00
|
|
|
from socket import timeout
|
|
|
|
from urllib.error import HTTPError, URLError
|
2022-07-13 22:19:21 +08:00
|
|
|
from datetime import datetime
|
|
|
|
import csv
|
2018-02-08 02:13:19 +08:00
|
|
|
|
2022-07-13 22:19:21 +08:00
|
|
|
imageExt = (".jpeg", ".jpg", ".exif", ".tif", ".tiff", ".gif", ".bmp", ".png", ".ppm",
|
|
|
|
".pgm", ".pbm", ".pnm", ".webp", ".hdr", ".heif", ".bat", ".bpg", ".cgm", ".svg")
|
2019-01-22 07:58:24 +08:00
|
|
|
ua = UserAgent()
|
2018-02-08 02:13:19 +08:00
|
|
|
|
2019-09-14 05:57:55 +08:00
|
|
|
count_email_in_phrase = 0
|
|
|
|
|
2018-02-09 01:12:58 +08:00
|
|
|
# Menú Principal
|
2018-02-08 02:13:19 +08:00
|
|
|
def menu():
|
2019-09-14 05:57:55 +08:00
|
|
|
|
|
|
|
global count_email_in_phrase
|
|
|
|
count_email_in_phrase = 0
|
|
|
|
|
2018-02-08 02:13:19 +08:00
|
|
|
try:
|
2018-02-08 08:03:48 +08:00
|
|
|
clear()
|
2018-02-10 01:55:59 +08:00
|
|
|
print(' .-"""-. ')
|
|
|
|
print(' / .===. \ ')
|
|
|
|
print(' \/ 6 6 \/')
|
|
|
|
print(' ( \___/ )')
|
2018-02-10 02:12:05 +08:00
|
|
|
print(' __________ooo___\_____/______________')
|
|
|
|
print(' / \ ')
|
|
|
|
print(' | EMAIL EXTRACTOR |')
|
2018-02-10 01:55:59 +08:00
|
|
|
print(' \__________________________ooo________/')
|
|
|
|
print('')
|
|
|
|
print(' ------------------------------------------------------------------')
|
|
|
|
print("| ENGLISH - ESPAÑOL | ")
|
|
|
|
print(" ------------------------------------------------------------------")
|
|
|
|
print("1 - Search only in the entered URL - Buscar solo en la URL ingresada")
|
2018-02-13 01:48:22 +08:00
|
|
|
print("2 - Search in a url (Two Levels) - Buscar en una URL(Dos Niveles)")
|
2018-02-10 01:55:59 +08:00
|
|
|
print("3 - Search phrase in google - Buscar frase en Google")
|
2019-01-30 08:35:00 +08:00
|
|
|
print("4 - Same as option 3 but with a list of keywords")
|
|
|
|
print("5 - List emails - Listar correos")
|
|
|
|
print("6 - Save emails in .txt file - Guardar correos en archivo .txt")
|
2022-07-13 22:19:21 +08:00
|
|
|
print("7 - Save emails in .csv file - Guardar correos en archivo .csv")
|
|
|
|
print("8 - Delete Emails from Data Base")
|
|
|
|
print("9 - Exit - Salir")
|
2018-02-10 01:55:59 +08:00
|
|
|
print("")
|
2018-02-08 08:03:48 +08:00
|
|
|
|
2018-02-09 03:53:13 +08:00
|
|
|
opcion = input("Enter option - Ingrese Opcion: ")
|
2018-02-08 02:13:19 +08:00
|
|
|
if (opcion == "1"):
|
2018-02-13 01:48:22 +08:00
|
|
|
print("")
|
2018-02-09 06:20:27 +08:00
|
|
|
print ("Example URL: http://www.pythondiario.com")
|
2018-02-09 03:53:13 +08:00
|
|
|
url = str(input("Enter URL - Ingrese URL: "))
|
2018-02-09 08:28:13 +08:00
|
|
|
extractOnlyUrl(url)
|
|
|
|
input("Press enter key to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
if (opcion == "2"):
|
2018-02-13 01:48:22 +08:00
|
|
|
print("")
|
2018-02-09 08:28:13 +08:00
|
|
|
print ("Example URL: http://www.pythondiario.com")
|
|
|
|
url = str(input("Enter URL - Ingrese URL: "))
|
2018-02-08 08:03:48 +08:00
|
|
|
extractUrl(url)
|
2018-02-09 06:20:27 +08:00
|
|
|
input("Press enter key to continue")
|
2018-02-09 03:53:13 +08:00
|
|
|
menu()
|
2018-02-08 08:03:48 +08:00
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
elif (opcion == "3"):
|
2018-02-13 01:48:22 +08:00
|
|
|
print("")
|
2018-02-09 03:53:13 +08:00
|
|
|
frase = str(input("Enter a phrase to search - Ingrese una frase a buscar: "))
|
|
|
|
print ("***Warning: The amount of results chosen impacts the execution time***")
|
2018-02-08 08:03:48 +08:00
|
|
|
print ("*** Advertencia: La cantidad de resultados elejidos impacta el tiempo de ejecucion")
|
2021-10-19 07:21:09 +08:00
|
|
|
cantRes = int(input("Number of Google links to explore - Cantiad de resultados en Google: "))
|
2018-02-09 06:20:27 +08:00
|
|
|
print ("")
|
2018-02-08 08:03:48 +08:00
|
|
|
extractFraseGoogle(frase, cantRes)
|
2018-02-09 06:20:27 +08:00
|
|
|
input("Press enter key to continue")
|
|
|
|
menu()
|
2019-01-30 08:35:00 +08:00
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
elif (opcion == "4"):
|
2019-01-30 08:35:00 +08:00
|
|
|
#extractKeywordsList("KeywordsList.txt")
|
|
|
|
print("Developing...")
|
|
|
|
input("Press enter key to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
elif (opcion == "5"):
|
2018-02-08 11:15:41 +08:00
|
|
|
print ("")
|
2018-02-09 03:53:13 +08:00
|
|
|
print ("1 - Select a phrase - Seleccionar una frase")
|
2018-02-11 11:16:04 +08:00
|
|
|
print ("2 - Insert a URL")
|
|
|
|
print ("3 - All emails - Todos los correos")
|
2018-02-09 03:53:13 +08:00
|
|
|
opcListar = input("Enter option - Ingrese Opcion: ")
|
2018-02-08 11:15:41 +08:00
|
|
|
|
|
|
|
if (opcListar == "1"):
|
2018-02-13 01:30:46 +08:00
|
|
|
listarPorFrase("Emails.db")
|
2018-02-08 11:15:41 +08:00
|
|
|
|
|
|
|
elif (opcListar == "2"):
|
2018-02-11 11:16:04 +08:00
|
|
|
listarPorUrl("Emails.db")
|
|
|
|
|
|
|
|
elif (opcListar == "3"):
|
2018-02-08 12:53:25 +08:00
|
|
|
listarTodo("Emails.db")
|
2018-02-08 11:15:41 +08:00
|
|
|
|
2018-02-11 11:16:04 +08:00
|
|
|
else:
|
|
|
|
print("Incorrect option, return to the menu...")
|
|
|
|
time.sleep(2)
|
|
|
|
menu()
|
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
elif (opcion == "6"):
|
2018-02-13 01:30:46 +08:00
|
|
|
print("")
|
|
|
|
print("1 - Save emails from a phrase - Guardar correos de una frase")
|
|
|
|
print("2 - Save emails from a URL - Guardar correos de una URL")
|
|
|
|
print("3 - Save all emails - Guardar todos los correos")
|
|
|
|
opcGuardar = input("Enter Option - Ingrese Opcion: ")
|
|
|
|
|
|
|
|
if(opcGuardar == "1"):
|
|
|
|
frase = str(input("Enter phrase: "))
|
|
|
|
guardarFrase("Emails.db", frase)
|
|
|
|
|
|
|
|
elif(opcGuardar == "2"):
|
|
|
|
print("Example URL: http://www.pythondiario.com")
|
|
|
|
url = str(input("Insert URL: "))
|
|
|
|
guardarUrl("Emails.db", url)
|
|
|
|
|
|
|
|
elif(opcGuardar == "3"):
|
|
|
|
guardarAll("Emails.db")
|
|
|
|
|
|
|
|
else:
|
|
|
|
print("Incorrect option, return to the menu...")
|
|
|
|
time.sleep(2)
|
|
|
|
menu()
|
2018-02-11 10:15:16 +08:00
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
elif (opcion == "7"):
|
2022-07-13 22:19:21 +08:00
|
|
|
guardarCsv("Emails.db")
|
|
|
|
|
|
|
|
elif (opcion == "8"):
|
2018-02-11 10:15:16 +08:00
|
|
|
print("")
|
|
|
|
print("1 - Delete emails from a especific URL")
|
|
|
|
print("2 - Delete emails from a especific phrase")
|
|
|
|
print("3 - Delete all Emails")
|
|
|
|
op = input("Enter option: ")
|
|
|
|
|
|
|
|
if(op == "1"):
|
2018-02-12 23:16:07 +08:00
|
|
|
print("Example URL: http://www.pythondiario.com")
|
|
|
|
url = str(input("Insert URL: "))
|
|
|
|
deleteUrl("Emails.db", url.strip())
|
2018-02-11 10:15:16 +08:00
|
|
|
|
|
|
|
elif(op == "2"):
|
2018-02-12 23:16:07 +08:00
|
|
|
phrase = str(input("Insert Phrase: "))
|
|
|
|
deletePhrase("Emails.db", phrase.strip())
|
2018-02-11 10:15:16 +08:00
|
|
|
|
|
|
|
elif(op == "3"):
|
|
|
|
deleteAll("Emails.db")
|
|
|
|
|
|
|
|
else:
|
|
|
|
print("Incorrect option, return to the menu...")
|
|
|
|
time.sleep(2)
|
|
|
|
menu()
|
|
|
|
|
2022-07-13 22:19:21 +08:00
|
|
|
elif (opcion == "9"):
|
2018-02-09 03:53:13 +08:00
|
|
|
sys.exit(0)
|
2018-02-08 11:15:41 +08:00
|
|
|
|
2018-02-08 08:03:48 +08:00
|
|
|
else:
|
2018-02-13 01:48:22 +08:00
|
|
|
print("")
|
2018-02-09 03:53:13 +08:00
|
|
|
print ("Select a correct option - Seleccione un opcion correcta")
|
2018-02-13 01:48:22 +08:00
|
|
|
time.sleep(3)
|
2018-02-08 02:13:19 +08:00
|
|
|
clear()
|
|
|
|
menu()
|
2018-02-13 01:48:22 +08:00
|
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
input("Press return to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
except Exception as e:
|
|
|
|
print (e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
2018-02-08 08:03:48 +08:00
|
|
|
|
2018-02-09 01:12:58 +08:00
|
|
|
# Insertar correo, frase y Url en base de datos
|
|
|
|
def insertEmail(db_file, email, frase, url):
|
2018-02-08 12:53:25 +08:00
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
2018-02-13 01:30:46 +08:00
|
|
|
c.execute("INSERT INTO emails (phrase, email, url) VALUES (?,?,?)", (frase, email, url))
|
2018-02-08 12:53:25 +08:00
|
|
|
conn.commit()
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-08 12:53:25 +08:00
|
|
|
finally:
|
|
|
|
conn.close()
|
2018-02-08 11:15:41 +08:00
|
|
|
|
|
|
|
# Buscar correo en la base de datos
|
2018-02-11 10:15:16 +08:00
|
|
|
def searchEmail(db_file, email, frase):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
2018-02-13 01:30:46 +08:00
|
|
|
sql = 'SELECT COUNT(*) FROM emails where email LIKE "%' + str(email) + '%" AND phrase LIKE "%' + str(frase) + '%"'
|
2018-02-11 10:15:16 +08:00
|
|
|
result = c.execute(sql).fetchone()
|
2018-02-11 11:16:04 +08:00
|
|
|
conn.close()
|
2018-02-11 10:15:16 +08:00
|
|
|
|
|
|
|
return (result[0])
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-11 10:15:16 +08:00
|
|
|
finally:
|
|
|
|
conn.close()
|
2018-02-08 12:53:25 +08:00
|
|
|
|
|
|
|
# Crea tabla principal
|
2018-02-11 11:16:04 +08:00
|
|
|
def crearTabla(db_file, delete = False):
|
2018-02-08 11:49:02 +08:00
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
2018-02-08 12:53:25 +08:00
|
|
|
|
2018-02-11 11:16:04 +08:00
|
|
|
if(delete == True):
|
|
|
|
c.execute('drop table if exists emails')
|
2018-02-08 12:53:25 +08:00
|
|
|
|
2018-02-08 11:49:02 +08:00
|
|
|
sql = '''create table if not exists emails
|
2018-02-08 12:53:25 +08:00
|
|
|
(ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
2018-02-13 01:30:46 +08:00
|
|
|
phrase varchar(500) NOT NULL,
|
2018-02-08 12:53:25 +08:00
|
|
|
email varchar(200) NOT NULL,
|
|
|
|
url varchar(500) NOT NULL)'''
|
2018-02-08 11:49:02 +08:00
|
|
|
|
|
|
|
c.execute(sql)
|
2018-02-13 01:30:46 +08:00
|
|
|
conn.close()
|
2018-02-08 11:49:02 +08:00
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-08 11:49:02 +08:00
|
|
|
finally:
|
|
|
|
conn.close()
|
|
|
|
|
2018-02-13 01:30:46 +08:00
|
|
|
# Guardar por URL en un archivo .txt
|
|
|
|
def guardarUrl(db_file, url):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails WHERE url = "' + url.strip() + '"'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("There are no emails to erase")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
nameFile = str(input("Name of the file: "))
|
|
|
|
print("")
|
|
|
|
print("Save file, please wait...")
|
|
|
|
|
|
|
|
f = open(nameFile.strip() + ".txt", "w")
|
|
|
|
|
|
|
|
c.execute('SELECT * FROM emails WHERE url = "' + url.strip() + '"')
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
|
|
|
|
for i in c:
|
|
|
|
count += 1
|
|
|
|
f.write("")
|
|
|
|
f.write("Number: " + str(count) + '\n')
|
|
|
|
f.write("Phrase: " + str(i[1]) + '\n')
|
|
|
|
f.write("Email: " + str(i[2]) + '\n')
|
|
|
|
f.write("Url: " + str(i[3]) + '\n')
|
|
|
|
f.write("-------------------------------------------------------------------------------" + '\n')
|
|
|
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Exception as o:
|
|
|
|
print(o)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
# Guardar por frase en un archivo .txt
|
|
|
|
def guardarFrase(db_file, frase):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails WHERE phrase = "' + frase.strip() + '"'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("There are no emails to erase")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
nameFile = str(input("Name of the file: "))
|
|
|
|
print("")
|
|
|
|
print("Save file, please wait...")
|
|
|
|
|
|
|
|
f = open(nameFile.strip() + ".txt", "w")
|
|
|
|
|
|
|
|
c.execute('SELECT * FROM emails WHERE phrase = "' + frase.strip() + '"')
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
|
|
|
|
for i in c:
|
|
|
|
count += 1
|
|
|
|
f.write("")
|
|
|
|
f.write("Number: " + str(count) + '\n')
|
|
|
|
f.write("Phrase: " + str(i[1]) + '\n')
|
|
|
|
f.write("Email: " + str(i[2]) + '\n')
|
|
|
|
f.write("Url: " + str(i[3]) + '\n')
|
|
|
|
f.write("-------------------------------------------------------------------------------" + '\n')
|
|
|
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Exception as o:
|
|
|
|
print(o)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
# Guardar todos los correos en un archivo .txt
|
|
|
|
def guardarAll(db_file):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
2021-10-19 07:21:09 +08:00
|
|
|
print("There are no emails to save")
|
2018-02-13 01:30:46 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
nameFile = str(input("Name of the file: "))
|
|
|
|
print("")
|
|
|
|
print("Save file, please wait...")
|
|
|
|
|
|
|
|
f = open(nameFile + ".txt", "w")
|
|
|
|
|
|
|
|
c.execute('SELECT * FROM emails')
|
|
|
|
|
|
|
|
count = 0
|
|
|
|
|
|
|
|
for i in c:
|
|
|
|
count += 1
|
|
|
|
f.write("")
|
|
|
|
f.write("Number: " + str(count) + '\n')
|
|
|
|
f.write("Phrase: " + str(i[1]) + '\n')
|
|
|
|
f.write("Email: " + str(i[2]) + '\n')
|
|
|
|
f.write("Url: " + str(i[3]) + '\n')
|
|
|
|
f.write("-------------------------------------------------------------------------------" + '\n')
|
|
|
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Exception as o:
|
|
|
|
print(o)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
|
|
|
|
2018-02-12 23:16:07 +08:00
|
|
|
# Borra todos los correos de una URL específica
|
|
|
|
def deleteUrl(db_file, url):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails WHERE url = ' + '"' + url + '"'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("There are no emails to erase")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
option = str(input("Are you sure you want to delete " + str(result[0]) + " emails? Y/N :"))
|
|
|
|
|
|
|
|
if(option == "Y" or option == "y"):
|
|
|
|
c.execute("DELETE FROM emails WHERE url = " + '"' + url + '"')
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
print("Emails deleted")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
elif(option == "N" or option == "n"):
|
|
|
|
print("Canceled operation, return to the menu ...")
|
|
|
|
time.sleep(2)
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
print("Select a correct option")
|
|
|
|
time.sleep(2)
|
|
|
|
deleteUrl(db_file, url)
|
2018-02-13 01:30:46 +08:00
|
|
|
|
|
|
|
conn.close()
|
2018-02-12 23:16:07 +08:00
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
2018-02-12 23:20:20 +08:00
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
2018-02-12 23:16:07 +08:00
|
|
|
|
|
|
|
# Borra todos los correos de una Frase específica
|
|
|
|
def deletePhrase(db_file, phrase):
|
2018-02-12 23:20:20 +08:00
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
2018-02-13 01:30:46 +08:00
|
|
|
sql = 'SELECT COUNT(*) FROM emails WHERE phrase = ' + '"' + phrase + '"'
|
2018-02-12 23:20:20 +08:00
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("There are no emails to erase")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
option = str(input("Are you sure you want to delete " + str(result[0]) + " emails? Y/N :"))
|
|
|
|
|
|
|
|
if(option == "Y" or option == "y"):
|
2018-02-13 01:30:46 +08:00
|
|
|
c.execute("DELETE FROM emails WHERE phrase = " + '"' + phrase + '"')
|
2018-02-12 23:20:20 +08:00
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
print("Emails deleted")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
elif(option == "N" or option == "n"):
|
|
|
|
print("Canceled operation, return to the menu ...")
|
|
|
|
time.sleep(2)
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
print("Select a correct option")
|
|
|
|
time.sleep(2)
|
|
|
|
deleteUrl(db_file, phrase)
|
|
|
|
|
2018-02-13 01:30:46 +08:00
|
|
|
conn.close()
|
|
|
|
|
2018-02-12 23:20:20 +08:00
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
2018-02-12 23:16:07 +08:00
|
|
|
|
2018-02-11 10:15:16 +08:00
|
|
|
# Borra todos los correos
|
|
|
|
def deleteAll(db_file):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("There are no emails to erase")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-12 23:16:07 +08:00
|
|
|
|
2018-02-11 10:15:16 +08:00
|
|
|
else:
|
|
|
|
option = str(input("Are you sure you want to delete " + str(result[0]) + " emails? Y/N :"))
|
|
|
|
|
|
|
|
if(option == "Y" or option == "y"):
|
|
|
|
c.execute("DELETE FROM emails")
|
|
|
|
conn.commit()
|
2018-02-11 11:16:04 +08:00
|
|
|
crearTabla("Emails.db", True)
|
2018-02-11 10:15:16 +08:00
|
|
|
print("All emails were deleted")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
elif(option == "N" or option == "n"):
|
|
|
|
print("Canceled operation, return to the menu ...")
|
|
|
|
time.sleep(2)
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
print("Select a correct option")
|
|
|
|
time.sleep(2)
|
|
|
|
deleteAll(db_file)
|
2018-02-13 01:30:46 +08:00
|
|
|
|
|
|
|
conn.close()
|
2018-02-11 10:15:16 +08:00
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-11 10:15:16 +08:00
|
|
|
finally:
|
|
|
|
conn.close()
|
|
|
|
|
2018-02-09 01:12:58 +08:00
|
|
|
# Lista correos por frase
|
2018-02-13 01:30:46 +08:00
|
|
|
def listarPorFrase(db_file):
|
|
|
|
try:
|
|
|
|
phrase = str(input("Inserter phrase: "))
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails WHERE phrase LIKE "%' + phrase.strip() + '%"'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("No results for the specified url")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
c.execute('SELECT * FROM emails WHERE phrase LIKE "%' + phrase.strip() + '%"')
|
|
|
|
|
|
|
|
for i in c:
|
|
|
|
|
|
|
|
print ("")
|
|
|
|
print ("Number: " + str(i[0]))
|
|
|
|
print ("Phrase: " + str(i[1]))
|
|
|
|
print ("Email: " + str(i[2]))
|
|
|
|
print ("Url: " + str(i[3]))
|
|
|
|
print ("-------------------------------------------------------------------------------")
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
print ("")
|
|
|
|
input("Press enter key to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
2018-02-08 11:15:41 +08:00
|
|
|
|
2018-02-13 01:30:46 +08:00
|
|
|
# Lista correos por URL
|
2018-02-11 11:16:04 +08:00
|
|
|
def listarPorUrl(db_file):
|
|
|
|
try:
|
|
|
|
print("Example URL: http://www.pythondiario.com ")
|
|
|
|
url = str(input("Insert a Url: "))
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails WHERE url LIKE "%' + url.strip() + '%"'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("No results for the specified url")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
else:
|
|
|
|
c.execute('SELECT * FROM emails WHERE url LIKE "%' + url.strip() + '%"')
|
|
|
|
|
|
|
|
for i in c:
|
|
|
|
|
|
|
|
print ("")
|
|
|
|
print ("Number: " + str(i[0]))
|
2018-02-13 01:30:46 +08:00
|
|
|
print ("Phrase: " + str(i[1]))
|
2018-02-11 11:16:04 +08:00
|
|
|
print ("Email: " + str(i[2]))
|
|
|
|
print ("Url: " + str(i[3]))
|
|
|
|
print ("-------------------------------------------------------------------------------")
|
|
|
|
|
2018-02-13 01:30:46 +08:00
|
|
|
conn.close()
|
2018-02-11 11:16:04 +08:00
|
|
|
|
|
|
|
print ("")
|
|
|
|
input("Press enter key to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
2018-02-13 01:30:46 +08:00
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
2018-02-11 11:16:04 +08:00
|
|
|
|
2018-02-08 12:53:25 +08:00
|
|
|
# Lista todos los correos
|
|
|
|
def listarTodo(db_file):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
2018-02-11 10:15:16 +08:00
|
|
|
|
|
|
|
sql = 'SELECT COUNT(*) FROM emails'
|
|
|
|
result = c.execute(sql).fetchone()
|
|
|
|
|
|
|
|
if(result[0] == 0):
|
|
|
|
print("The data base is Empty")
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-08 12:53:25 +08:00
|
|
|
c.execute("SELECT * FROM emails")
|
|
|
|
|
|
|
|
for i in c:
|
|
|
|
|
|
|
|
print ("")
|
2018-02-09 03:53:13 +08:00
|
|
|
print ("Number: " + str(i[0]))
|
2018-02-13 01:30:46 +08:00
|
|
|
print ("Phrase: " + str(i[1]))
|
2018-02-08 12:53:25 +08:00
|
|
|
print ("Email: " + str(i[2]))
|
|
|
|
print ("Url: " + str(i[3]))
|
2018-02-08 22:29:19 +08:00
|
|
|
print ("-------------------------------------------------------------------------------")
|
2018-02-08 12:53:25 +08:00
|
|
|
|
2018-02-13 01:30:46 +08:00
|
|
|
conn.close()
|
2018-02-09 01:12:58 +08:00
|
|
|
|
|
|
|
print ("")
|
2018-02-09 06:20:27 +08:00
|
|
|
input("Press enter key to continue")
|
2018-02-09 01:12:58 +08:00
|
|
|
menu()
|
2018-02-08 12:53:25 +08:00
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-08 12:53:25 +08:00
|
|
|
finally:
|
|
|
|
conn.close()
|
2018-02-08 11:15:41 +08:00
|
|
|
|
2018-02-13 01:30:46 +08:00
|
|
|
# Extrae los correos de una única URL
|
2018-02-09 08:28:13 +08:00
|
|
|
def extractOnlyUrl(url):
|
|
|
|
try:
|
2018-02-09 09:09:18 +08:00
|
|
|
print ("Searching emails... please wait")
|
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
count = 0
|
2018-02-09 09:09:18 +08:00
|
|
|
listUrl = []
|
|
|
|
|
2019-01-22 07:58:24 +08:00
|
|
|
req = urllib.request.Request(
|
|
|
|
url,
|
|
|
|
data=None,
|
|
|
|
headers={
|
|
|
|
'User-Agent': ua.random
|
|
|
|
})
|
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
try:
|
|
|
|
conn = urllib.request.urlopen(req, timeout=10)
|
|
|
|
|
|
|
|
except timeout:
|
|
|
|
raise ValueError('Timeout ERROR')
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
except (HTTPError, URLError):
|
|
|
|
raise ValueError('Bad Url...')
|
|
|
|
|
|
|
|
status = conn.getcode()
|
|
|
|
contentType = conn.info().get_content_type()
|
|
|
|
|
|
|
|
if(status != 200 or contentType == "audio/mpeg"):
|
2022-07-13 22:19:21 +08:00
|
|
|
raise ValueError('Bad Url...')
|
2019-01-30 08:35:00 +08:00
|
|
|
|
2022-07-22 01:55:03 +08:00
|
|
|
html = conn.read().decode(conn.headers.get_content_charset())
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2018-02-09 10:48:34 +08:00
|
|
|
emails = re.findall(r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}', html)
|
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
for email in emails:
|
2022-07-13 22:19:21 +08:00
|
|
|
if (email not in listUrl and not email.endswith(imageExt)):
|
2018-02-11 10:15:16 +08:00
|
|
|
count += 1
|
|
|
|
print(str(count) + " - " + email)
|
|
|
|
listUrl.append(email)
|
|
|
|
if(searchEmail("Emails.db", email, "Especific Search") == 0):
|
|
|
|
insertEmail("Emails.db", email, "Especific Search", url)
|
|
|
|
|
2018-02-09 10:48:34 +08:00
|
|
|
print("")
|
|
|
|
print("***********************")
|
2018-02-09 09:09:18 +08:00
|
|
|
print(str(count) + " emails were found")
|
2018-02-09 10:48:34 +08:00
|
|
|
print("***********************")
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2018-02-11 05:58:22 +08:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
input("Press return to continue")
|
|
|
|
menu()
|
2018-02-11 10:15:16 +08:00
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
except Exception as e:
|
|
|
|
print (e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2018-02-08 08:03:48 +08:00
|
|
|
# Extrae los correos de una Url - 2 niveles
|
|
|
|
def extractUrl(url):
|
2018-02-09 08:28:13 +08:00
|
|
|
print ("Searching emails... please wait")
|
|
|
|
print ("This operation may take several minutes")
|
2018-02-09 06:20:27 +08:00
|
|
|
try:
|
2018-02-09 08:28:13 +08:00
|
|
|
count = 0
|
|
|
|
listUrl = []
|
2019-01-22 07:58:24 +08:00
|
|
|
req = urllib.request.Request(
|
|
|
|
url,
|
|
|
|
data=None,
|
|
|
|
headers={
|
|
|
|
'User-Agent': ua.random
|
|
|
|
})
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
try:
|
|
|
|
conn = urllib.request.urlopen(req, timeout=10)
|
|
|
|
|
|
|
|
except timeout:
|
|
|
|
raise ValueError('Timeout ERROR')
|
|
|
|
|
|
|
|
except (HTTPError, URLError):
|
|
|
|
raise ValueError('Bad Url...')
|
|
|
|
|
|
|
|
status = conn.getcode()
|
|
|
|
contentType = conn.info().get_content_type()
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
if(status != 200 or contentType == "audio/mpeg"):
|
2022-07-13 22:19:21 +08:00
|
|
|
raise ValueError('Bad Url...')
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2022-07-22 01:55:03 +08:00
|
|
|
html = conn.read().decode(conn.headers.get_content_charset())
|
2018-02-09 10:48:34 +08:00
|
|
|
|
|
|
|
emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", html)
|
|
|
|
print ("Searching in " + url)
|
|
|
|
|
|
|
|
for email in emails:
|
2022-07-13 22:19:21 +08:00
|
|
|
if (email not in listUrl and not email.endswith(imageExt)):
|
2018-02-09 10:48:34 +08:00
|
|
|
count += 1
|
|
|
|
print(str(count) + " - " + email)
|
|
|
|
listUrl.append(email)
|
2022-07-20 20:22:29 +08:00
|
|
|
if(searchEmail("Emails.db", email, "Especific Search") == 0):
|
|
|
|
insertEmail("Emails.db", email, "Especific Search", url)
|
2018-02-09 06:20:27 +08:00
|
|
|
|
|
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
links = soup.find_all('a')
|
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
print("They will be analyzed " + str(len(links) + 1) + " Urls..." )
|
|
|
|
time.sleep(2)
|
|
|
|
|
2018-02-09 06:20:27 +08:00
|
|
|
for tag in links:
|
|
|
|
link = tag.get('href', None)
|
|
|
|
if link is not None:
|
2018-02-09 08:28:13 +08:00
|
|
|
try:
|
|
|
|
print ("Searching in " + link)
|
|
|
|
if(link[0:4] == 'http'):
|
2019-01-22 07:58:24 +08:00
|
|
|
req = urllib.request.Request(
|
|
|
|
link,
|
|
|
|
data=None,
|
|
|
|
headers={
|
|
|
|
'User-Agent': ua.random
|
|
|
|
})
|
2019-01-30 08:35:00 +08:00
|
|
|
|
|
|
|
try:
|
|
|
|
f = urllib.request.urlopen(req, timeout=10)
|
|
|
|
|
|
|
|
except timeout:
|
|
|
|
print("Bad Url..")
|
|
|
|
time.sleep(2)
|
|
|
|
pass
|
|
|
|
|
|
|
|
except (HTTPError, URLError):
|
|
|
|
print("Bad Url..")
|
|
|
|
time.sleep(2)
|
|
|
|
pass
|
|
|
|
|
|
|
|
status = f.getcode()
|
|
|
|
contentType = f.info().get_content_type()
|
|
|
|
|
|
|
|
if(status != 200 or contentType == "audio/mpeg"):
|
|
|
|
print("Bad Url..")
|
|
|
|
time.sleep(2)
|
|
|
|
pass
|
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
s = f.read().decode('utf-8')
|
2019-01-30 08:35:00 +08:00
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
|
2019-01-30 08:35:00 +08:00
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
for email in emails:
|
2022-07-13 22:19:21 +08:00
|
|
|
if (email not in listUrl and not email.endswith(imageExt)):
|
2018-02-09 09:09:18 +08:00
|
|
|
count += 1
|
|
|
|
print(str(count) + " - " + email)
|
|
|
|
listUrl.append(email)
|
2018-02-11 10:15:16 +08:00
|
|
|
if(searchEmail("Emails.db", email, "Especific Search") == 0):
|
|
|
|
insertEmail("Emails.db", email, "Especific Search", url)
|
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
# Sigue si existe algun error
|
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
|
2018-02-11 10:15:16 +08:00
|
|
|
print("")
|
|
|
|
print("***********************")
|
2019-09-14 05:57:55 +08:00
|
|
|
print("Finish: " + str(count) + " emails were found")
|
2018-02-11 10:15:16 +08:00
|
|
|
print("***********************")
|
2019-09-14 05:57:55 +08:00
|
|
|
input("Press return to continue")
|
|
|
|
menu()
|
2018-02-09 06:20:27 +08:00
|
|
|
|
2018-02-11 05:58:22 +08:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
input("Press return to continue")
|
|
|
|
menu()
|
|
|
|
|
2018-02-09 06:20:27 +08:00
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
2018-02-08 08:03:48 +08:00
|
|
|
|
|
|
|
# Extrae los correos de todas las Url encontradas en las busquedas
|
|
|
|
# De cada Url extrae los correo - 2 niveles
|
|
|
|
def extractFraseGoogle(frase, cantRes):
|
2018-02-11 10:15:16 +08:00
|
|
|
print ("Searching emails... please wait")
|
|
|
|
print ("This operation may take several minutes")
|
2018-02-09 08:28:13 +08:00
|
|
|
try:
|
|
|
|
listUrl = []
|
2019-09-14 05:57:55 +08:00
|
|
|
listEmails = []
|
2018-02-09 01:12:58 +08:00
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
for url in search(frase, stop=cantRes):
|
|
|
|
listUrl.append(url)
|
2018-02-08 08:03:48 +08:00
|
|
|
|
2018-02-09 08:28:13 +08:00
|
|
|
for i in listUrl:
|
2018-02-11 05:58:22 +08:00
|
|
|
try:
|
2019-01-22 07:58:24 +08:00
|
|
|
req = urllib.request.Request(
|
|
|
|
i,
|
|
|
|
data=None,
|
|
|
|
headers={
|
|
|
|
'User-Agent': ua.random
|
|
|
|
})
|
2019-01-30 08:35:00 +08:00
|
|
|
try:
|
|
|
|
conn = urllib.request.urlopen(req)
|
|
|
|
except timeout:
|
|
|
|
print("Bad Url..")
|
|
|
|
time.sleep(2)
|
|
|
|
pass
|
|
|
|
except(HTTPError, URLError):
|
|
|
|
print("Bad Url..")
|
|
|
|
time.sleep(2)
|
|
|
|
pass
|
|
|
|
|
|
|
|
status = conn.getcode()
|
|
|
|
contentType = conn.info().get_content_type()
|
|
|
|
|
|
|
|
if(status != 200 or contentType == "audio/mpeg"):
|
|
|
|
print("Bad Url..")
|
|
|
|
time.sleep(2)
|
|
|
|
pass
|
2019-01-22 07:58:24 +08:00
|
|
|
|
2018-02-11 05:58:22 +08:00
|
|
|
html = conn.read()
|
|
|
|
|
|
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
links = soup.find_all('a')
|
|
|
|
|
2019-01-30 08:35:00 +08:00
|
|
|
print("They will be analyzed " + str(len(links) + 1) + " Urls..." )
|
|
|
|
time.sleep(2)
|
|
|
|
|
2018-02-11 05:58:22 +08:00
|
|
|
for tag in links:
|
|
|
|
link = tag.get('href', None)
|
|
|
|
if link is not None:
|
2019-09-14 05:57:55 +08:00
|
|
|
# Fix TimeOut
|
|
|
|
searchSpecificLink(link, listEmails, frase)
|
|
|
|
|
2018-02-11 05:58:22 +08:00
|
|
|
except urllib.error.URLError as e:
|
|
|
|
print("Problems with the url:" + i)
|
|
|
|
print(e)
|
|
|
|
pass
|
2019-09-14 05:57:55 +08:00
|
|
|
except (http.client.IncompleteRead) as e:
|
|
|
|
print(e)
|
|
|
|
pass
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
pass
|
|
|
|
|
|
|
|
print("")
|
|
|
|
print("*******")
|
|
|
|
print("Finish")
|
|
|
|
print("*******")
|
|
|
|
input("Press return to continue")
|
|
|
|
menu()
|
2018-02-09 08:28:13 +08:00
|
|
|
|
2018-02-11 05:58:22 +08:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
input("Press return to continue")
|
|
|
|
menu()
|
2018-02-09 08:28:13 +08:00
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
2019-01-30 08:35:00 +08:00
|
|
|
|
|
|
|
# Extraer lista de palabras claves de txt
|
|
|
|
def extractKeywordsList(txtFile):
|
|
|
|
f = open(txtFile, 'r')
|
|
|
|
text = f.read()
|
|
|
|
keywordList = text.split(sep='\n')
|
|
|
|
for key in keywordList:
|
|
|
|
print(key)
|
|
|
|
|
2018-02-08 02:13:19 +08:00
|
|
|
# Limpia la pantalla según el sistema operativo
|
|
|
|
def clear():
|
2018-02-08 11:15:41 +08:00
|
|
|
try:
|
|
|
|
if os.name == "posix":
|
|
|
|
os.system("clear")
|
|
|
|
elif os.name == "ce" or os.name == "nt" or os.name == "dos":
|
|
|
|
os.system("cls")
|
2018-02-09 08:28:13 +08:00
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
2018-02-11 11:16:04 +08:00
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
2019-09-14 05:57:55 +08:00
|
|
|
|
|
|
|
def searchSpecificLink(link, listEmails, frase):
|
|
|
|
try:
|
|
|
|
|
|
|
|
global count_email_in_phrase
|
|
|
|
|
2021-10-19 07:21:09 +08:00
|
|
|
print("Searching in " + link)
|
2019-09-14 05:57:55 +08:00
|
|
|
if(link[0:4] == 'http'):
|
|
|
|
f = urllib.request.urlopen(link, timeout=10)
|
|
|
|
s = f.read().decode('utf-8')
|
|
|
|
emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
|
|
|
|
for email in emails:
|
2022-07-13 22:19:21 +08:00
|
|
|
if (email not in listEmails and not email.endswith(imageExt)):
|
2019-09-14 05:57:55 +08:00
|
|
|
count_email_in_phrase += 1
|
|
|
|
listEmails.append(email)
|
|
|
|
print(str(count_email_in_phrase) + " - " + email)
|
|
|
|
if (searchEmail("Emails.db", email, frase) == 0):
|
|
|
|
insertEmail("Emails.db", email, frase, link)
|
|
|
|
|
|
|
|
# Sigue si existe algun error
|
|
|
|
except (HTTPError, URLError) as e:
|
|
|
|
print(e)
|
|
|
|
pass
|
|
|
|
except timeout:
|
|
|
|
print('socket timed out - URL %s', link)
|
|
|
|
pass
|
|
|
|
except (http.client.IncompleteRead) as e:
|
|
|
|
print(e)
|
|
|
|
pass
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
pass
|
2018-02-08 02:13:19 +08:00
|
|
|
|
2022-07-13 22:19:21 +08:00
|
|
|
def guardarCsv(db_file):
|
|
|
|
try:
|
|
|
|
conn = sqlite3.connect(db_file)
|
|
|
|
c = conn.cursor()
|
|
|
|
|
|
|
|
nameFile = datetime.now().strftime('csvemails_%Y_%m_%d_%H_%M_%S.csv')
|
|
|
|
print("")
|
|
|
|
print("Creating csv, please wait...")
|
|
|
|
|
|
|
|
f = open(nameFile, "w", newline="")
|
|
|
|
writer = csv.writer(f)
|
|
|
|
|
|
|
|
header = ['Phrase', 'Email', 'Url']
|
|
|
|
writer.writerow(header)
|
|
|
|
|
|
|
|
c.execute('SELECT * FROM emails')
|
|
|
|
|
|
|
|
for i in c:
|
|
|
|
row = [str(i[1]), str(i[2]), str(i[3])]
|
|
|
|
writer.writerow(row)
|
|
|
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Error as e:
|
|
|
|
print(e)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
except Exception as o:
|
|
|
|
print(o)
|
|
|
|
input("Press enter to continue")
|
|
|
|
menu()
|
|
|
|
|
|
|
|
finally:
|
|
|
|
conn.close()
|
|
|
|
|
2018-02-08 11:49:02 +08:00
|
|
|
# Inicio de Programa
|
2018-02-09 03:53:13 +08:00
|
|
|
def Main():
|
|
|
|
clear()
|
2018-02-11 11:16:04 +08:00
|
|
|
crearTabla("Emails.db", False)
|
2018-02-09 03:53:13 +08:00
|
|
|
menu()
|
|
|
|
|
2018-02-12 23:16:07 +08:00
|
|
|
Main()
|