#!/usr/local/bin python
"""
This script is used to check urls with a pattern. The configuration file has following format
    protocol_name|url|additional_args
example:
    simpleGET|http://www.google.com/|Google
    simpleGET|http://www.aol.com/|WhatWhat
Example output:
    http://www.google/ -> True
    http://www.aol.com/ -> False
"""
import os.path, re, getopt, sys, urllib2, urllib
verbose = False
FETCHER = 'fetcher'
MATCHER = 'matcher'
def say(s):
    if verbose:
        print s
class UrlChecker:
    def __init__(self, urls, protocols):
        self.urls = urls
        self.protocols = protocols
    def check(self):
        for rurl in self.urls:
            self._check_url(rurl)
            
    def _check_url(self, rurl):
        protocol, url, args = rurl.split('|')
        say("Check URL %s with protocol %s. Args = %s" % (url, protocol, args))
        rslt = self.protocols[protocol][FETCHER](url, args)
        self.protocols[protocol][MATCHER](rslt, url, args)
def load_urls(urlfile):
    urls = []
    f = open(urlfile, 'r')
    try:
        for line in f:
            say("Load one raw url %s" % (line.strip()))
            urls.append(line.strip())
    finally:
        f.close()
    return urls
def add_simpleGET(protocols):
    def simple_get(url, ignore):
        r = urllib2.urlopen(url)
        rslt = r.read()
        r.close()
        return rslt
    def str_match(body, url, pattern):
        print "%s -> %s" % (url, body.find(pattern) != -1)
    protocols['simpleGET'] = {FETCHER : simple_get, MATCHER : str_match}
def init_protocols():
    protocols = {};
    add_simpleGET(protocols)
    return protocols
if __name__ == '__main__':
    def usage():
        print "Usage:"
        print "python check_url.py -f <url_file> -v"
    try:                                
        opts, args = getopt.getopt(sys.argv[1:], "f:v")
    except getopt.GetoptError:
        usage()
        sys.exit(2)                     
    urls = None
    protocols = init_protocols()
    for opt, arg in opts:
        if opt in ("-v"):
            verbose = True
        elif opt in ("-f"):
            urls = load_urls(arg)
    if urls is None:
        usage()
        sys.exit(1)
    checker = UrlChecker(urls, protocols)
    checker.check()
2008-02-08
Web page smoke test script
Subscribe to:
Comments (Atom)