2018-12-28 08:49:56 +08:00
|
|
|
from discovery.constants import *
|
2019-01-06 17:50:07 +08:00
|
|
|
from lib.core import *
|
2019-01-11 10:09:47 +08:00
|
|
|
from parsers import myparser
|
|
|
|
import requests
|
2018-12-28 08:49:56 +08:00
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
|
|
class search_twitter:
|
|
|
|
|
|
|
|
def __init__(self, word, limit):
|
|
|
|
self.word = word.replace(' ', '%20')
|
|
|
|
self.results = ""
|
|
|
|
self.totalresults = ""
|
2019-01-14 07:58:38 +08:00
|
|
|
self.server = 'www.google.com'
|
|
|
|
self.hostname = 'www.google.com'
|
|
|
|
self.quantity = '100'
|
2018-12-28 08:49:56 +08:00
|
|
|
self.limit = int(limit)
|
|
|
|
self.counter = 0
|
|
|
|
|
|
|
|
def do_search(self):
|
|
|
|
try:
|
2019-01-14 07:58:38 +08:00
|
|
|
urly = 'https://' + self.server + '/search?num=100&start=' + str(self.counter) + '&hl=en&meta=&q=site%3Atwitter.com%20intitle%3A%22on+Twitter%22%20' + self.word
|
2018-12-28 08:49:56 +08:00
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
2019-01-06 17:50:07 +08:00
|
|
|
headers = {'User-Agent': Core.get_user_agent()}
|
2018-12-28 08:49:56 +08:00
|
|
|
try:
|
2019-01-21 08:32:58 +08:00
|
|
|
r = requests.get(urly, headers=headers)
|
2018-12-28 08:49:56 +08:00
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
self.results = r.text
|
|
|
|
self.totalresults += self.results
|
|
|
|
|
|
|
|
def get_people(self):
|
2019-01-06 17:50:07 +08:00
|
|
|
rawres = myparser.Parser(self.totalresults, self.word)
|
2019-02-16 01:45:13 +08:00
|
|
|
to_parse = rawres.people_twitter()
|
|
|
|
# fix invalid handles that look like @user other_output
|
|
|
|
handles = set()
|
|
|
|
for handle in to_parse:
|
2019-02-16 11:53:29 +08:00
|
|
|
handle = str(handle).strip()
|
2019-02-16 07:58:15 +08:00
|
|
|
if len(handle) > 2:
|
2019-02-16 11:53:29 +08:00
|
|
|
if ' ' in handle:
|
2019-02-16 05:36:18 +08:00
|
|
|
handle = handle.split(' ')[0]
|
2019-02-16 11:47:40 +08:00
|
|
|
# strip off period at the end if exists
|
2019-02-16 11:49:58 +08:00
|
|
|
if handle[len(handle) - 1] == '.':
|
2019-02-16 11:47:40 +08:00
|
|
|
handle = handle[:len(handle) - 1]
|
|
|
|
# strip periods if contains three of them
|
2019-02-16 11:53:29 +08:00
|
|
|
if '...' in handle:
|
2019-02-16 11:47:40 +08:00
|
|
|
handle = handle[:handle.index('.')]
|
2019-02-16 05:36:18 +08:00
|
|
|
handles.add(handle)
|
2019-02-16 12:02:16 +08:00
|
|
|
if '@' in handles:
|
|
|
|
handles.remove('@')
|
2019-02-16 01:45:13 +08:00
|
|
|
return handles
|
2018-12-28 08:49:56 +08:00
|
|
|
|
|
|
|
def process(self):
|
|
|
|
while self.counter < self.limit:
|
|
|
|
self.do_search()
|
|
|
|
time.sleep(getDelay())
|
|
|
|
self.counter += 100
|
2019-01-11 10:09:47 +08:00
|
|
|
print(f'\tSearching {self.counter} results.')
|