1+ import os
2+ import requests
3+ import hashlib
4+ import pandas as pd
5+
6+ class virtual_host_scanner (object ):
7+ """Virtual host scanning class
8+
9+ Virtual host scanner has the following properties:
10+
11+ Attributes:
12+ wordlist: location to a wordlist file to use with scans
13+ target: the target for scanning
14+ port: the port to scan. Defaults to 80
15+ ignore_http_codes: commad seperated list of http codes to ignore
16+ ignore_content_length: integer value of content length to ignore
17+ output: folder to write output file to
18+ """
19+
20+ def __init__ (self , target , output , port = 80 , unique_depth = 1 , ignore_http_codes = '404' , ignore_content_length = 0 ,
21+ wordlist = "./wordlists/virtual-host-scanning.txt" ):
22+ self .target = target
23+ self .output = output + '/' + target + '_virtualhosts.txt'
24+ self .port = port
25+ self .ignore_http_codes = list (map (int , ignore_http_codes .replace (' ' , '' ).split (',' )))
26+ self .ignore_content_length = ignore_content_length
27+ self .wordlist = wordlist
28+ self .unique_depth = unique_depth
29+
30+ self .completed_scan = False
31+ self .results = []
32+
33+ def scan (self ):
34+ print ("[+] Starting virtual host scan for %s using port %s and wordlist %s" % (self .target , str (self .port ), self .wordlist ))
35+ print ("[>] Ignoring HTTP codes: %s" % (self .ignore_http_codes ))
36+ if (self .ignore_content_length > 0 ):
37+ print ("[>] Ignoring Content length: %s" % (self .ignore_content_length ))
38+
39+ if not os .path .exists (self .wordlist ):
40+ print ("[!] Wordlist %s doesn't exist, ending scan." % self .wordlist )
41+ return
42+
43+ virtual_host_list = open (self .wordlist ).read ().splitlines ()
44+
45+ for virtual_host in virtual_host_list :
46+ hostname = virtual_host .replace ('%s' , self .target )
47+
48+ headers = {
49+ 'Host' : hostname if self .port == 80 else '{}:{}' .format (hostname , self .port ),
50+ 'Accept' : '*/*'
51+ }
52+
53+ # todo: to be made redundant/replaced with a --ssl flag? Current implementation limits ssl severely
54+ dest_url = '{}://{}:{}/' .format ('https' if int (self .port ) == 443 else 'http' , self .target , self .port )
55+
56+ try :
57+ res = requests .get (dest_url , headers = headers , verify = False )
58+ except requests .exceptions .RequestException :
59+ continue
60+
61+ if res .status_code in self .ignore_http_codes :
62+ continue
63+
64+ if self .ignore_content_length > 0 and self .ignore_content_length == int (res .headers .get ('content-length' )):
65+ continue
66+
67+ # hash the page results to aid in identifing unique content
68+ page_hash = hashlib .sha256 (res .text .encode ('utf-8' )).hexdigest ()
69+ output = '[#] Found: {} (code: {}, length: {}, hash: {})' .format (hostname , res .status_code ,
70+ res .headers .get ('content-length' ), page_hash )
71+
72+ # print current results
73+ print (output )
74+ for key , val in res .headers .items ():
75+ output = ' {}: {}' .format (key , val )
76+ print (output )
77+
78+ # add url and hash into array for likely matches
79+ self .results .append (hostname + ',' + page_hash )
80+
81+ self .completed_scan = True
82+
83+
84+ def likely_matches (self ):
85+ if self .completed_scan is False :
86+ print ("Likely matches cannot be printed as a scan has not yet been run." )
87+ return
88+
89+ print ("\n [#] Most likely matches with a unique count of %s or less:" % self .unique_depth )
90+
91+ d = {}
92+
93+ for item in self .results :
94+ r = item .split ("," )
95+ d [r [0 ]]= r [1 ]
96+
97+ df = pd .DataFrame ([[key , value ] for key , value in d .items ()], columns = ["key_col" , "val_col" ])
98+ d = df .groupby ("val_col" ).filter (lambda x : len (x ) <= self .unique_depth )
99+ matches = ((d ["key_col" ].values ).tolist ())
100+
101+ return matches
0 commit comments