#!/usr/bin/env python # This parses a Squid logfile and outputs a list of traffic by clients and # then Web site visited, with Kbytes per site, user and total. # # NOTE THAT THERE ARE PRIVACY ISSUES IN ANALYSING WEB PROXY LOGFILES; THIS # IS INTENDED AS A DEMONSTRATION ONLY. # # usage: cat [logfile] | ./squid_users.py > [outfile] import sys from weblog import squid, url, resolve o_log = squid.AccessParser(sys.stdin) p_log = url.Parser(o_log) log = resolve.SimpleResolver(p_log) log.set_client = 'host' users = {} ttl_bytes = 0 while log.getlogent(): ttl_bytes = ttl_bytes + log.bytes try: users[log.client]['TTL_B'] = users[log.client].get('TTL_B', 0) + log.bytes except KeyError: users[log.client] = {} users[log.client]['HOSTS'] = {} users[log.client]['TTL_B'] = log.bytes users[log.client]['HOSTS'][log.url_host] = users[log.client]['HOSTS'].get(log.url_host, 0) + log.bytes print "TOTAL KBYTES: %s\n" % (int(ttl_bytes / 1024.0)) names = users.keys() names.sort(lambda a, b, us = users: cmp(us[b]['TTL_B'], us[a]['TTL_B'])) for name in names: print "\n%s - %s Kb" % (name, int(users[name]['TTL_B'] / 1024.0)) host_list = users[name]['HOSTS'] hosts = host_list.keys() hosts.sort(lambda a, b, hl = host_list: cmp(hl[b], hl[a])) for host in hosts: print " %6i %s" % (int(host_list[host] / 1024.0), host)