#!/usr/bin/env python """ Grabber Core v0.1 Copyright (C) 2006 - Romain Gaucher - http://rgaucher.info """ from BeautifulSoup import BeautifulSoup from xml.sax import * # Need PyXML [http://pyxml.sourceforge.net/] from optparse import OptionParser from urllib2 import URLError, HTTPError import urllib import time import re,sys,os # Personal libraries from spider import database, database_css, database_js from spider import spider, cj, allowedExtensions COOKIEFILE = 'cookies.lwp' # the path and filename that you want to use to save your cookies in import os.path txdata = None refererUrl = "http://google.com/?q=grabber" txheaders = {'User-agent' : 'Grabber/0.1 (X11; U; Linux i686; en-US; rv:1.7)', 'Referer' : refererUrl} import cookielib import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request def normalize_whitespace(text): return ' '.join(text.split()) def clear_whitespace(text): return text.replace(' ','') # Configuration variables confFile = False confUrl = "" confSpider = False confActions = [] confInfos = {} # Handle the XML file with a SAX Parser class ConfHandler(ContentHandler): def __init__(self): global confFile confFile = True self.inSite = False self.inScan = False self.inSpider = False self.inUrl = False self.inAction = False self.string = "" self.listActions = ["crystal", "sql","bsql","xss","include","backup","javascript", "session"] def startElement(self, name, attrs): global confUrl,confInfos self.string = "" if name == 'site': self.inSite = True if name == 'spider' and self.inSite: self.inSpider = True if name == 'scan' and self.inSite: self.inScan = True elif self.inSite and name == 'url': self.inUrl = True confUrl = "" elif self.inScan and name in self.listActions: self.inAction = True if 'info' in attrs.keys(): confInfos[name] = attrs.getValue('info') def characters(self, ch): if self.inSite: self.string = self.string + ch def endElement(self, name): global confUrl,confActions,confSpider if name == 'url' and self.inUrl: self.inUrl = False confUrl = normalize_whitespace(self.string) if name == 'spider' and self.inSpider: self.inSpider = False confSpider = clear_whitespace(self.string) if name in self.listActions and self.inScan and not name in confActions: confActions.append(name) if name == 'site' and self.inSite: self.inSite = False attack_list = { } # Handle the XML file with a SAX Parser class AttackHandler(ContentHandler): def __init__(self): global attack_list attack_list = {} self.inElmt = False self.inCode = False self.inName = False self.sName = "" self.code = "" def startElement(self, name, attrs): if name == 'attack': self.inElmt = True elif name == 'code': self.inCode = True self.code = "" elif name == "name": self.inName = True self.sName = "" def characters(self, ch): if self.inCode: self.code = self.code + ch elif self.inName: self.sName = self.sName + ch def endElement(self, name): global attack_list if name == 'code': self.inCode = False self.code = normalize_whitespace(self.code) if name == 'name': self.inName = False self.sName = normalize_whitespace(self.sName) if name == 'attack': self.inElmt = False # send the plop in the dictionnary if not (self.sName in attack_list.keys()): attack_list[self.sName] = [] attack_list[self.sName].append(self.code) class LogHandler: def __init__(self, fileName): self.stream = None try: self.stream = open(fileName, 'w') except IOError: print "Error during the construction of the log system" return self.stream.write("# Log from Grabber.py\n") def __le__(self, string): self.stream.write(string + '\n') self.stream.flush() def __del__(self): self.stream.close() log = LogHandler('grabber.log') def unescape(s): """ Unescaping the HTML special characters """ s = s.replace("<", "<") s = s.replace(">", ">") s = s.replace(""", "\"") s = s.replace("'","'") s = s.replace("&", "&") return s def single_urlencode(text): """single URL-encode a given 'text'. Do not return the 'variablename=' portion.""" blah = urllib.urlencode({'blahblahblah':text}) #we know the length of the 'blahblahblah=' is equal to 13. This lets us avoid any messy string matches blah = blah[13:] blah = blah.replace('%5C0','%00') return blah def getContent_GET(url,param,injection): global log """ Get the content of the url by GET method """ newUrl = url ret = None if url.find('?') < 0: if url[len(url)-1] != '/' and not allowedExtensions(url): url += '/' newUrl = url + '?' + param + '=' + single_urlencode(str(injection)) else: newUrl = url + '&' + param + '=' + single_urlencode(str(injection)) try: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) log <= ( newUrl) req = Request(newUrl, None, txheaders) # create a request object ret = urlopen(req) # and open it to return a handle on the url ret = urlopen(req) # and open it to return a handle on the url except HTTPError, e: log <= ( 'The server couldn\'t fulfill the request.') log <= ( 'Error code: %s' % e.code) return None except URLError, e: log <= ( 'We failed to reach a server.') log <= ( 'Reason: %s' % e.reason) return None except IOError: log <= ( "Cannot open: %s" % url) return None return ret def getContentDirectURL_GET(url, string): global log """ Get the content of the url by GET method """ ret = "" try: if len(string) > 0: if url[len(url)-1] != '/' and url.find('?') < 0 and not allowedExtensions(url): url += '/' url = url + "?" + (string) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) log <= ( url) req = Request(url, None, txheaders) # create a request object ret = urlopen(req) # and open it to return a handle on the url except HTTPError, e: log <= ( 'The server couldn\'t fulfill the request.') log <= ( 'Error code: %s' % e.code) return None except URLError, e: log <= ( 'We failed to reach a server.') log <= ( 'Reason: %s' % e.reason) return None except IOError: log <= ( "Cannot open: %s" % url) return None return ret def getContent_POST(url,param,injection): global log """ Get the content of the url by POST method """ txdata = urllib.urlencode({param: injection}) ret = None try: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) log <= ( url) log <= ( txdata) req = Request(url, txdata, txheaders) # create a request object ret = urlopen(req) # and open it to return a handle on the url ret = urlopen(req) # and open it to return a handle on the url except HTTPError, e: log <= ( 'The server couldn\'t fulfill the request.') log <= ( 'Error code: %s' % e.code) return None except URLError, e: log <= ( 'We failed to reach a server.') log <= ( 'Reason: %s' % e.reason) return None except IOError: log <= ( "Cannot open: %s" % url) return None return ret def getContentDirectURL_POST(url,allParams): global log """ Get the content of the url by POST method """ txdata = urllib.urlencode(allParams) ret = None try: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) log <= ( url) log <= ( txdata) req = Request(url, txdata, txheaders) # create a request object ret = urlopen(req) # and open it to return a handle on the url ret = urlopen(req) # and open it to return a handle on the url except HTTPError, e: log <= ( 'The server couldn\'t fulfill the request.') log <= ( 'Error code: %s' % e.code) return None except URLError, e: log <= ( 'We failed to reach a server.') log <= ( 'Reason: %s' % e.reason) return None except IOError: log <= ( "Cannot open: %s" % url) return None return ret # Levenstein distance def ld(a, b): # stolen from m.l. hetland n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a,b = b,a n,m = m,n current = xrange(n+1) for i in xrange(1,m+1): previous, current = current, [i]+[0] * m for j in xrange(1, n+1): add, delete = previous[j] + 1, current[j-1] + 1 change = previous[j-1] if a[j-1] != b[i-1]: change +=1 current[j] = min(add, delete, change) return current[n] def partially_in(object, container, url = "IMPOSSIBLE_GRABBER_URL", two_long = False): """ Crappy decision function Is an object partially in a text ? """ try: if object in container and url not in container: return True except TypeError: return False if not two_long: # load the big engine... dist = ld(object, container) # espilon ~ len(object) / 4 b1 = int(len(object) - len(object) / 4) b2 = int(len(object) + len(object) / 4) # diff of the original text and the given text length = abs(len(container)- dist) if b1 < length and length < b2: return True else: # load the big engine... dist = ld(object, container) # espilon ~ len(object) / (len(object) + 1) b1 = int(len(object) - len(object) / (len(object) + 1)) b2 = int(len(object) + len(object) / (len(object) + 1)) # diff of the original text and the given text length = abs(len(container)- dist) if b1 < length and length < b2: return True return False def load_definition(fileName): """ Load the XML of definition """ global attack_list attack_list = {} parser = make_parser() xss_handler = AttackHandler() # Tell the parser to use our handler parser.setContentHandler(xss_handler) parser.parse(fileName) def setDatabase(localDatabase): global database database = {} database = localDatabase def investigate(url, what = "xss"): global attack_list """ Cross-Site Scripting Checking Injection Blind Injection """ localDB = None if what == "xss": from xss import process load_definition('xssAttacks.xml') localDB = database elif what == "sql": from sql import process load_definition('sqlAttacks.xml') localDB = database elif what == "bsql": from bsql import process load_definition('bsqlAttacks.xml') localDB = database elif what == "backup": from backup import process localDB = database elif what == "include": from files import process load_definition('filesAttacks.xml') localDB = database elif what == "javascript": from javascript import process localDB = database_js elif what == "crystal": from crystal import process localDB = database elif what == "session": if 'session' in confInfos: attack_list = confInfos['session'] localDB = None from session import process else: raise AtrributeError("You need to give the session id storage key e.g. PHPSESSID, sid etc. ") process(url, localDB, attack_list) # look at teh cookies returned for index, cookie in enumerate(cj): print '[Cookie]\t', index, '\t:\t', cookie cj.save(COOKIEFILE) # put a link def active_link(s): pos = s.find('http://') if pos < 1: return s else: print pos, len(s), s[pos:len(s)] url = s[pos:len(s)] newStr = s[0:pos-1] + "" + urllib.unquote(url) + "" return newStr return s def createStructure(): try: os.mkdir("results") except OSError,e : a=0 try: os.mkdir("local") except OSError,e : a=0 try: os.mkdir("local/js") except OSError,e : a=0 try: os.mkdir("local/css") except OSError,e : a=0 if __name__ == '__main__': option_url = "" option_sql = False option_bsql = False option_xss = False option_backup = False option_include = False option_spider = False option_js = False option_crystal = False option_session = False if len(sys.argv) > 1: parser = OptionParser() parser.add_option("-u", "--url", dest="archives_url", help="Adress to investigate") parser.add_option("-s", "--sql", dest="sql", action="store_true",default=False, help="Look for the SQL Injection") parser.add_option("-x", "--xss", dest="xss", action="store_true",default=False, help="Perform XSS attacks") parser.add_option("-b", "--bsql", dest="bsql", action="store_true",default=False, help="Look for blind SQL Injection") parser.add_option("-z", "--backup", dest="backup", action="store_true",default=False, help="Look for backup files") parser.add_option("-d", "--spider", dest="spider", help="Look for every files") parser.add_option("-i", "--include", dest="include", action="store_true",default=False, help="Perform File Insertion attacks") parser.add_option("-j", "--javascript", dest="javascript", action="store_true",default=False, help="Test the javascript code ?") parser.add_option("-c", "--crystal", dest="crystal", action="store_true",default=False, help="Simple crystal ball test.") parser.add_option("-e", "--session", dest="session", action="store_true",default=False, help="Session evaluations") (options, args) = parser.parse_args() option_url = options.archives_url option_sql = options.sql option_bsql = options.bsql option_xss = options.xss option_backup = options.backup option_include = options.include option_spider = options.spider option_js = options.javascript option_crystal = options.crystal option_session = options.session else: try: f = open("grabber.conf.xml", 'r') except IOError: print "No arguments ? You need to setup the XML configuration file or using the inline arguments" print "Look at the doc to start..." sys.exit(1) parser = make_parser() conf_handler = ConfHandler() # Tell the parser to use our handler parser.setContentHandler(conf_handler) parser.parse("grabber.conf.xml") option_url = confUrl option_spider = confSpider option_sql = "sql" in confActions option_bsql = "bsql" in confActions option_xss = "xss" in confActions option_backup = "backup" in confActions option_include= "include" in confActions option_js = "javascript" in confActions option_crystal= "crystal" in confActions option_session= "session" in confActions # default to localhost ? archives_url = "http://localhost" if option_url: archives_url = option_url root = archives_url createStructure() depth = 1 try: depth = int(option_spider.strip().split()[0]) except (ValueError, IndexError,AttributeError): depth = 0 try: try: spider(archives_url, depth) except IOError,e : print "Cannot open the url = %s" % archives_url print e.strerror sys.exit(1) if len(database.keys()) < 1: print "No information found!" sys.exit(1) else: print "Start investigation..." if option_sql: investigate(archives_url, "sql") if option_xss: investigate(archives_url) if option_bsql: investigate(archives_url,"bsql") if option_backup: investigate(archives_url, "backup") if option_include: investigate(archives_url, "include") if option_js: investigate(archives_url, "javascript") if option_crystal: investigate(archives_url, "crystal") if option_session: investigate(archives_url, "session") except KeyboardInterrupt: print "Plouf!"