# WSMap (Web Services Mapper) v0.10 # (C)2005 Information Security Partners, LLC # Written by Alex Stamos and Jesse Burns # Questions, comments, and suggestions go to alex@isecpartners.com """ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """ import os, urlparse, pycurl, StringIO from optparse import OptionParser # one of these strings must be present for a page to be judged a WSDL document wsdl_substrings = ['wsdl:', 'targetNamespace', 'xmlns:'] # these strings must be present for a page to be judged a DISCO document disco_substrings = ['disco:discovery'] def findFilesNamed(name, showProgress = False): """Takes a file name, and returns all the file names that match it under the current working directory""" logFiles = [] if (showProgress): print 'Walking file system.', for root, dirs, files in os.walk ('.'): for f in files: if (f == name): foundFile = root + os.sep + f logFiles.append(foundFile) if (showProgress): print '.', if (showProgress): print return logFiles def getUrlLines(filename, requiredSubstring, showNames = False): """finds all the lines in the filename specified that start with URL:""" urlLines = [] for line in open(filename, 'r').readlines(): if (line.startswith("URL:")): if ((requiredSubstring is None) or (line.find(requiredSubstring) != -1)): urlLines.append(line[5:].rstrip()) if (showNames): print "Source", filename, 'with', len(urlLines), "lines" return urlLines # returns the URL without GET parameters def stripParameters(url): q, s = url.find('?'), url.find(';') if (q == -1 and s == -1): return url elif q == -1: i = s elif s == -1: i = q else: i = min(q,s) return url[:i] def isCrap(url): exts = ["gif", "jpg", "ico", "png", "bmp", "js", "css", "pdf", "ra"] url = url.lower() for x in exts: if (url.endswith(x)): return True return False def findPath(url): """parses the url and returns its target""" (s, n, path, q, p, f) = urlparse.urlparse(url) return path def findHost(url): """parses the url and returns its hostname""" (s, netloc, p, q, p, f) = urlparse.urlparse(url) # clip the port off if present if (netloc.find(':') != -1): netloc = netloc[:netloc.find(':')] return netloc def findDirectory(url): (s, n, path, q, p, f) = urlparse.urlparse(url) path = path.replace('\\', '/') if (path.find('?') != -1): path = path[:path.find('?')] a = path.rfind('/') return path[:a+1] def getDirectories(urls): """find the directories on a large list of URLS""" dirs = [] for url in urls: dirs.append(findDirectory(url)) return dirs def getDirectoriesByHost(urls): """find the directories listed by host in a dict on a large list of URLS""" dirsByHost = {} for url in urls: host = findHost(url) if (dirsByHost.has_key(host)): dirsByHost[host].append(findDirectory(url)) else: dirsByHost[host] = [findDirectory(url)] return dirsByHost # Try to find any implied directories def findImpliedDirs(dirs): """For each directory return that directory, and any directories before it in the path""" result = [] for dir in dirs: result.append(dir) s = dir while (s.rfind('/') > 0): s = s[:s[:s.rfind('/')].rfind('/')+1] result.append(s) return result def unique(alist): """ takes a sorted list and leaves it with only unique elements """ scratch = [] scratch.extend(alist) if (len(alist) < 2): return count = 0 for a in scratch: if (count != 0): if (b == a): del alist[count] count = count - 1 b = a count = count + 1 def get_curl(): curl = pycurl.Curl() output = StringIO.StringIO() # no verify for SSL curl.setopt(pycurl.WRITEFUNCTION, output.write) curl.setopt(pycurl.SSL_VERIFYHOST, 0) curl.setopt(pycurl.SSL_VERIFYPEER, 0) curl.setopt(pycurl.TIMEOUT, 30) return (curl, output) def returns_wsdl(url, write_file, filename): "returns true if the url results in a wsdl, if write_file is true, wsdl content is saved in filename" (c, b) = get_curl() c.setopt(pycurl.URL, url) try: c.perform() except: return False for x in wsdl_substrings: if (b.getvalue().find(x) != -1): if (write_file): f = open(filename, 'w') f.write(b.getvalue()) return True return False def returns_disco(url, write_file, filename): (c, b) = get_curl() c.setopt(pycurl.URL, url) try: c.perform() except: return False for x in disco_substrings: if (b.getvalue().find(x) == -1): return False if (write_file): f = open(filename, 'w') f.write(b.getvalue()) return True return False def main(): allLines = [] wsdlCtr = 1; discoCtr = 1; print """ Web Services Mapper (WSMap) v0.10 (C)2005 Information Security Partners, LLC Written by Jesse Burns and Alex Stamos http://www.isecpartners.com/tools.html """ # Create OptionParser object parser = OptionParser(version="%prog v.10") # Create options parser.add_option("-W", "--WSDL", action="store_true", dest="testWSDL", help="perform ?WSDL check on URLs found in WebScarab logs") parser.add_option("-D", "--DISCO", action="store_true", dest="testDISCO", help="perform ?DISCO check on URLs found in WebScarab logs") parser.add_option("-w", "--write", action="store_true", dest="writeFiles", help="store found WSDL or DISCO files in current directory") parser.add_option("-v", "--verbose", action="store_true", dest="verbose", help="provides verbose output including a list of tested urls") parser.add_option("-l", "--list", action="store_true", dest="list", help="lists the source Web Scarab log files being processed") parser.add_option("-r", "--require", action="store", dest="required_content", help="requires this string to be in every scanned url. Commonly used with target domains.") # Parse options (options, args) = parser.parse_args() if (options.testWSDL): print "WSDL Discovery Enabled" if (options.testDISCO): print "DISCO Discovery Enabled" if (not options.testWSDL and not options.testDISCO): print "No action selected!", print "You must specify at least one action: -W, -D." print "For help use -h or --help." # get a list of conversation logs, and the url lines from thos logs for a in findFilesNamed('conversationlog', options.list): allLines.extend(getUrlLines(a, options.required_content, options.list)) if (options.testWSDL): paramUrls = [] normalUrls = [] for url in allLines: url = stripParameters(url) if (not isCrap(url)): paramUrls.append(url) path = findPath(url).lower() if (path.find('wsdl') != -1 and not url in normalUrls): normalUrls.append(url) print "\nTesting files that looked like WSDLs..." for url in normalUrls: if (options.verbose): print url, if (returns_wsdl(url, options.writeFiles, str(wsdlCtr) + ".wsdl")): print "Found WSDL: " + url if (options.writeFiles): print "\t- storing in file: " + str(wsdlCtr) + ".wsdl" wsdlCtr += 1 else: if (options.verbose): print "None" paramUrls.sort() unique(paramUrls) print "\nTesting URLs with ?WSDL..." for url in paramUrls: if (options.verbose): print url, if (returns_wsdl( url + '?WSDL', options.writeFiles, str(wsdlCtr) + ".wsdl")): print "Found WSDL: " + url + "?WSDL" if (options.writeFiles): print "\t- storing in file: " + str(wsdlCtr) + ".wsdl" wsdlCtr += 1 else: if (options.verbose): print "None" if (options.testDISCO): uniqUrls = [] domains = [] guessedUrls = [] for url in allLines: if (not isCrap(url)): uniqUrls.append(stripParameters(url)) host = findHost(url) if (not host in domains): domains.append(host) guessedUrls.append('http://' + host + '/Default.VSDisco') dirsByHost = getDirectoriesByHost(allLines) for host in dirsByHost.keys(): dirs = findImpliedDirs(dirsByHost[host]) dirs.sort() unique(dirs) for dir in dirs: guessedUrls.append("http://" + host + dir + 'default.disco') uniqUrls.sort() unique(uniqUrls) print "\nTesting URLs with ?DISCO..." for url in uniqUrls: if (options.verbose): print url, if (returns_disco(url + "?DISCO", options.writeFiles, str(discoCtr) + '.disco')): print "Found DISCO: " + url + "?DISCO" if (options.writeFiles): print "\t- storing in file: " + str(discoCtr) + ".disco" discoCtr += 1 else: if (options.verbose): print "None" print "\nTesting guessed Disco URLs..." for url in guessedUrls: if (options.verbose): print url, if (returns_disco(url, options.writeFiles, str(discoCtr) + '.disco')): print "Found DISCO: " + url if (options.writeFiles): print "\t- storing in file: " + str(discoCtr) + ".disco" discoCtr += 1 else: if (options.verbose): print "None" if __name__ == '__main__': main()