#!/usr/local/bin python
"""
This script is used to check urls with a pattern. The configuration file has following format
protocol_name|url|additional_args
example:
simpleGET|http://www.google.com/|Google
simpleGET|http://www.aol.com/|WhatWhat
Example output:
http://www.google/ -> True
http://www.aol.com/ -> False
"""
import os.path, re, getopt, sys, urllib2, urllib
verbose = False
FETCHER = 'fetcher'
MATCHER = 'matcher'
def say(s):
if verbose:
print s
class UrlChecker:
def __init__(self, urls, protocols):
self.urls = urls
self.protocols = protocols
def check(self):
for rurl in self.urls:
self._check_url(rurl)
def _check_url(self, rurl):
protocol, url, args = rurl.split('|')
say("Check URL %s with protocol %s. Args = %s" % (url, protocol, args))
rslt = self.protocols[protocol][FETCHER](url, args)
self.protocols[protocol][MATCHER](rslt, url, args)
def load_urls(urlfile):
urls = []
f = open(urlfile, 'r')
try:
for line in f:
say("Load one raw url %s" % (line.strip()))
urls.append(line.strip())
finally:
f.close()
return urls
def add_simpleGET(protocols):
def simple_get(url, ignore):
r = urllib2.urlopen(url)
rslt = r.read()
r.close()
return rslt
def str_match(body, url, pattern):
print "%s -> %s" % (url, body.find(pattern) != -1)
protocols['simpleGET'] = {FETCHER : simple_get, MATCHER : str_match}
def init_protocols():
protocols = {};
add_simpleGET(protocols)
return protocols
if __name__ == '__main__':
def usage():
print "Usage:"
print "python check_url.py -f <url_file> -v"
try:
opts, args = getopt.getopt(sys.argv[1:], "f:v")
except getopt.GetoptError:
usage()
sys.exit(2)
urls = None
protocols = init_protocols()
for opt, arg in opts:
if opt in ("-v"):
verbose = True
elif opt in ("-f"):
urls = load_urls(arg)
if urls is None:
usage()
sys.exit(1)
checker = UrlChecker(urls, protocols)
checker.check()
2008-02-08
Web page smoke test script
Subscribe to:
Posts (Atom)