Working urlscan

2025-02-24 14:32:57 +08:00 · 2020-05-23 21:50:40 +01:00 · 2020-05-23 21:50:40 +01:00 · eca7eca44a
commit eca7eca44a
parent 5d7b911ed3
4 changed files with 24 additions and 15 deletions
--- a/.github/workflows/theHarvester.yml
+++ b/.github/workflows/theHarvester.yml
@ -111,6 +111,10 @@ jobs:
      run: |
        python theHarvester.py -d yale.edu -b twitter
    - name: Run theHarvester module Urlscan
      run: |
        python theHarvester.py -d yale.edu -b urlscan
    - name: Run theHarvester module Virustotal
      run: |
        python theHarvester.py -d yale.edu -b virustotal
--- a/theHarvester/main.py
+++ b/theHarvester/main.py
@ -37,8 +37,8 @@ async def start():
    parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, bufferoverun, certspotter, crtsh, dnsdumpster,
                        dogpile, duckduckgo, exalead, github-code, google,
                        hackertarget, hunter, intelx, linkedin, linkedin_links, netcraft, otx, pentesttools,
-                        urlscan, rapiddns, securityTrails, spyse, sublist3r, suip, threatcrowd,
+                        rapiddns, securityTrails, spyse, sublist3r, suip, threatcrowd,
-                        trello, twitter, virustotal, yahoo, all''')
+                        trello, twitter, urlscan, virustotal, yahoo, all''')
    args = parser.parse_args()
    try:
@ -300,14 +300,6 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                    except Exception as e:
                        print(e)
                elif engineitem == 'urlscan':
                    from theHarvester.discovery import urlscan
                    try:
                        urlscan_search = urlscan.SearchUrlscan(word)
                        stor_lst.append(store(urlscan_search, engineitem, store_host=True))
                    except Exception as e:
                        print(e)
                elif engineitem == 'pentesttools':
                    from theHarvester.discovery import pentesttools
                    try:
@ -381,6 +373,14 @@ async def store(search_engine: Any, source: str, process_param: Any = None, stor
                    twitter_search = twittersearch.SearchTwitter(word, limit)
                    stor_lst.append(store(twitter_search, engineitem, store_people=True))
                elif engineitem == 'urlscan':
                    from theHarvester.discovery import urlscan
                    try:
                        urlscan_search = urlscan.SearchUrlscan(word)
                        stor_lst.append(store(urlscan_search, engineitem, store_host=True, store_ip=True))
                    except Exception as e:
                        print(e)
                elif engineitem == 'virustotal':
                    from theHarvester.discovery import virustotal
                    virustotal_search = virustotal.SearchVirustotal(word)
--- a/theHarvester/discovery/init.py
+++ b/theHarvester/discovery/init.py
@ -25,6 +25,7 @@
           'threatcrowd',
           'trello',
           'twittersearch',
           'urlscan',
           'virustotal',
           'yahoosearch',
           ]
--- a/theHarvester/discovery/urlscan.py
+++ b/theHarvester/discovery/urlscan.py
@ -3,20 +3,24 @@
 class SearchUrlscan:
    def __init__(self, word):
        self.word = word
        self.totalhosts = list
        self.totalips = list
        self.proxy = False
    async def do_search(self):
        url = f'https://urlscan.io/api/v1/search/?q=domain:{self.word}'
        response = await AsyncFetcher.fetch_all([url], json=True, proxy=self.proxy)
-        self.totalhosts: set = {host['domain'] for host in response[0]}
+        resp = response[0]
-        print(self.totalhosts)
+        self.totalhosts = {f"{page['page']['domain']}" for page in resp['results']}
        self.totalips = {f"{page['page']['ip']}" for page in resp['results'] if 'ip' in page['page'].keys()}
-    # async def get_hostnames(self) -> Type[list]:
+    async def get_hostnames(self) -> Type[list]:
-    #     return self.totalhosts
+        return self.totalhosts
    async def get_ips(self) -> Type[list]:
        return self.totalips
    async def process(self, proxy=False):
        self.proxy = proxy