#!/usr/bin/python
# -*- coding: utf-8 -*-
# Comentarios en Español
# Blog: www.pythondiario.com

from googlesearch import search
from bs4 import BeautifulSoup
import urllib.request
import random
import os
import time
import sqlite3
from sqlite3 import Error
import sys
import re

# Menú Principal
def menu():
	try:
		clear()
		print ("###################################################################")
		print ("#                                                                 #")
		print ("#                       EMAIL EXTRACTOR                           #")
		print ("#                                                                 #")
		print ("###################################################################")
		print ("")
		print ("              ENGLISH             -               ESPAÑOL          ")
		print ("-------------------------------------------------------------------")
		print ("1 - Search only in the entered URL - Buscar solo en la URL ingresada")
		print ("2 - Search in a url (Two Levels) - Buscar en una URL(Dos Niveles) **Enter the url and the ones you find inside**")
		print ("3 - Search phrase in google - Buscar frase en Google")
		print ("4 - List emails - Listar correos")
		print ("5 - Save emails in .txt file - Guardar correos en archivo .txt")
		print ("6 - Exit - Salir")
		print ("")

		opcion = input("Enter option - Ingrese Opcion: ")
		if (opcion == "1"):
			print ("Example URL: http://www.pythondiario.com")
			url = str(input("Enter URL - Ingrese URL: "))
			extractOnlyUrl(url)
			input("Press enter key to continue")
			menu()

		if (opcion == "2"):
			print ("Example URL: http://www.pythondiario.com")
			url = str(input("Enter URL - Ingrese URL: "))
			extractUrl(url)
			input("Press enter key to continue")
			menu()

		elif (opcion == "3"):
			frase = str(input("Enter a phrase to search - Ingrese una frase a buscar: "))
			print ("***Warning: The amount of results chosen impacts the execution time***")
			print ("*** Advertencia: La cantidad de resultados elejidos impacta el tiempo de ejecucion")
			cantRes = int(input("Number of results in Google - Cantiad de resultados en Google: ")) 
			print ("")
			extractFraseGoogle(frase, cantRes)
			input("Press enter key to continue")
			menu()
		
		elif (opcion == "4"):
			print ("")
			print ("1 - Select a phrase - Seleccionar una frase")
			print ("2 - All emails - Todos los correos")
			opcListar = input("Enter option - Ingrese Opcion: ")
			
			if (opcListar == "1"):
				listarPorFrase()

			elif (opcListar == "2"):
				listarTodo("Emails.db")

		elif (opcion == "5"):
			print ("")
			print ("1 - Save emails from a phrase - Guardar correos de una frase")
			print ("2 - Save all emails - Guardar todos los correos")
		
		elif (opcion == "6"):
			sys.exit(0)

		else:			
			print ("Select a correct option - Seleccione un opcion correcta")
			time.sleep(2)
			clear()
			menu()
		
	except Exception as e:
		print (e)

# Insertar correo, frase y Url en base de datos
def insertEmail(db_file, email, frase, url):
	try:
		conn = sqlite3.connect(db_file)
		c = conn.cursor()
		c.execute("INSERT INTO emails (frase, email, url) VALUES (?,?,?)", (frase, email, url))
		conn.commit()
		conn.close()

	except Error as e:
		print(e)
	finally:
		conn.close()

# Buscar correo en la base de datos
def searchEmail(db_file):
	pass
    #try:
       #conn = sqlite3.connect(db_file)
	#except Error as e:
		#print(e)
	#finally:
		# conn.close()

# Crea tabla principal		
def crearTabla(db_file):
	try:
		conn = sqlite3.connect(db_file)
		c = conn.cursor()
		
		#c.execute('drop table if exists emails')

		sql = '''create table if not exists emails 
				(ID INTEGER PRIMARY KEY AUTOINCREMENT,
				 frase varchar(500) NOT NULL,
				 email varchar(200) NOT NULL,
				 url varchar(500) NOT NULL)'''

		c.execute(sql)
		c.close()

	except Error as e:
		print(e)
	finally:
		conn.close()

# Lista correos por frase
def listarPorFrase():
	pass

# Lista todos los correos
def listarTodo(db_file):
	try:
		conn = sqlite3.connect(db_file)
		c = conn.cursor()
		c.execute("SELECT * FROM emails")

		for i in c:

			print ("")
			print ("Number: " + str(i[0]))
			print ("Search: " + str(i[1]))
			print ("Email: " + str(i[2]))
			print ("Url: " + str(i[3]))
			print ("-------------------------------------------------------------------------------")

		c.close()
		
		print ("")
		input("Press enter key to continue")
		menu()

	except Error as e:
		print(e)
	finally:
		conn.close()


def extractOnlyUrl(url):
	try:
		count = 0
		conn = urllib.request.urlopen(url)

		html = conn.read().decode('utf-8')

		print ("Searching emails... please wait")

		emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", html)

		for email in emails:
			count += 1
			print(str(count) + " - " + email)

	except Exception as e:
		print (e)

# Extrae los correos de una Url - 2 niveles
def extractUrl(url):
	print ("Searching emails... please wait")
	print ("This operation may take several minutes")
	try:
		count = 0

		listUrl = []

		conn = urllib.request.urlopen(url)

		html = conn.read()

		soup = BeautifulSoup(html, "lxml")
		links = soup.find_all('a')

		for tag in links:
			link = tag.get('href', None)
			if link is not None:
				try:
					#listUrl.append(link)
					print ("Searching in " + link)
					if(link[0:4] == 'http'):
						f = urllib.request.urlopen(link)
						s = f.read().decode('utf-8')
						emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}", s)
						for email in emails:
							count += 1
							print(str(count) + " - " + email)
				# Sigue si existe algun error
				except Exception:
					pass
		

	except Exception as e:
		print(e)

# Extrae los correos de todas las Url encontradas en las busquedas
# De cada Url extrae los correo - 2 niveles
def extractFraseGoogle(frase, cantRes):
	try:
		listUrl = []

		for url in search(frase, stop=cantRes):
			listUrl.append(url)

		for i in listUrl:
			conn = urllib.request.urlopen(i)

			html = conn.read()

			soup = BeautifulSoup(html, "lxml")
			links = soup.find_all('a')

			for tag in links:
				link = tag.get('href', None)
				if link is not None:
					print (link)

	except Exception as e:
		print(e)

# Limpia la pantalla según el sistema operativo
def clear():
	try:
		if os.name == "posix":
			os.system("clear")
		elif os.name == "ce" or os.name == "nt" or os.name == "dos":
			os.system("cls")
	except Exception as e:
		print(e)

# Inicio de Programa
def Main():
	clear()
	crearTabla("Emails.db")	
	menu()
	insertEmail("Emails.db", "Programadores en Uruguay", "prueba@gmail.com", "www.pythondiario.com")

Main()