#!/usr/bin/env python # This script shows what the most popular referers to pages on your site are. # Make sure you set log.siteurl and c_log.directory_index as appropriate. # (this requires a logfile in combined format) # # usage: cat [logfile] | ./referers.py [siteurl(s)] > [outfile] import sys from weblog import combined, url, clean, referer o_log = combined.Parser(sys.stdin) p_log = url.Parser(o_log) c_log = clean.Cleaner(p_log) c_log.directory_index = ['index.html', 'index.htm'] log = referer.Typer(c_log) log.siteurl = sys.argv[1:] offsites = {} while log.getlogent(): if log.referer_type is 'OFFSITE': try: offsites[log.url][log.referer] = offsites[log.url].get(log.referer, 0) + 1 except KeyError: offsites[log.url] = {} offsites[log.url][log.referer] = 1 pages = offsites.keys() pages.sort() for page in pages: print "\n%s" % (page) ref_nums = offsites[page] referers = ref_nums.keys() referers.sort(lambda a, b, rn = ref_nums: cmp(rn[b], rn[a])) for referer in referers: print "%s %s" % (ref_nums[referer], referer)