2008-02-08

Web page smoke test script

#!/usr/local/bin python

"""
This script is used to check urls with a pattern. The configuration file has following format

protocol_name|url|additional_args

example:
simpleGET|http://www.google.com/|Google
simpleGET|http://www.aol.com/|WhatWhat

Example output:
http://www.google/ -> True
http://www.aol.com/ -> False

"""

import os.path, re, getopt, sys, urllib2, urllib

verbose = False
FETCHER = 'fetcher'
MATCHER = 'matcher'

def say(s):
if verbose:
print s

class UrlChecker:

def __init__(self, urls, protocols):
self.urls = urls
self.protocols = protocols

def check(self):
for rurl in self.urls:
self._check_url(rurl)

def _check_url(self, rurl):
protocol, url, args = rurl.split('|')
say("Check URL %s with protocol %s. Args = %s" % (url, protocol, args))
rslt = self.protocols[protocol][FETCHER](url, args)
self.protocols[protocol][MATCHER](rslt, url, args)

def load_urls(urlfile):
urls = []
f = open(urlfile, 'r')
try:
for line in f:
say("Load one raw url %s" % (line.strip()))
urls.append(line.strip())
finally:
f.close()
return urls

def add_simpleGET(protocols):

def simple_get(url, ignore):
r = urllib2.urlopen(url)
rslt = r.read()
r.close()
return rslt

def str_match(body, url, pattern):
print "%s -> %s" % (url, body.find(pattern) != -1)

protocols['simpleGET'] = {FETCHER : simple_get, MATCHER : str_match}

def init_protocols():
protocols = {};
add_simpleGET(protocols)
return protocols

if __name__ == '__main__':

def usage():
print "Usage:"
print "python check_url.py -f <url_file> -v"

try:
opts, args = getopt.getopt(sys.argv[1:], "f:v")
except getopt.GetoptError:
usage()
sys.exit(2)

urls = None
protocols = init_protocols()
for opt, arg in opts:
if opt in ("-v"):
verbose = True
elif opt in ("-f"):
urls = load_urls(arg)
if urls is None:
usage()
sys.exit(1)

checker = UrlChecker(urls, protocols)
checker.check()