#!/usr/bin/env python """ Session Analyzer Module for Grabber v0.1 Copyright (C) 2006 - Romain Gaucher - http://rgaucher.info """ import sys,re,time,datetime from grabber import getContentDirectURL_GET sessions = {} def normalize_whitespace(text): return ' '.join(text.split()) def getDirectSessionID(currentURL, sid): k = currentURL.find(sid) if k > 0: return currentURL[k+10:] return None def stripNoneASCII(output): # should be somepthing to do that.. :/ newOutput = "" for s in output: try: s = s.encode() newOutput += s except UnicodeDecodeError: continue return newOutput regDate = re.compile(r'^Date: (.*)$', re.I) def lookAtSessionID(url, sidName, regSession): global sessions handle = getContentDirectURL_GET(url,"") if handle != None: output = handle.read() header = str(handle.info()).split('\n') for h in header: # extract date header information if regDate.match(h): out = regDate.search(h) date = out.group(1) # convert this date into the good GMT number # ie time in seconds since 01/01/1970 00:00:00 gi = time.strptime(normalize_whitespace(date.replace('GMT','')), "%a, %d %b %Y %H:%M:%S") gi = time.mktime(gi) - time.mktime(time.gmtime(0)) output = output.replace('\n','') output = output.replace('\t','') # print output[790:821] output = stripNoneASCII(output) if output.find(sidName) > 0: if regSession.match(output): out = regSession.search(output) ssn = out.group(2) if ssn != None: if gi != None: sessions[ssn] = gi else: sessions[ssn] = '' def process(url, database, sidName): regString = "(.*)" + sidName + "=([a-z|A-Z|0-9]+)(.*)" regSession = re.compile(regString,re.I) print url, sidName, regString for k in range(0,1000): lookAtSessionID(url, sidName, regSession) o = open('results/sessions.txt','w') for s in sessions: o.write("%s, %s\n" % (s, sessions[s])) o.close()