#!/usr/bin/env python import os import re from operator import itemgetter mboxdir = '/usr/local/mailman/archives/private' def getfiles () : files = [] for filename in os.listdir( mboxdir ): match = re.match(r'(.*)\.mbox$', filename) if match: files.append(filename) return files def getemail ( file ) : emails = [] file = mboxdir+'/'+file+'/'+file try: h = open(file, 'r'); except IOError, e: return emails for line in h.readlines(): match = re.match(r'^From: (.*)\<([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})\>$', line) if match: emails.append(match.group(2)) return emails def getkeys( dict ) : lists = [] for email in dict.keys(): for list in dict[ email ].keys(): if not list in lists: lists.append(list) return lists def parsehtml( all ): keys = sorted(getkeys (all)) output = "" output += "" for list in keys: list = list.replace(".mbox", "") output += "\n" output += "\n" for email in sorted(all.iterkeys()): output += "\n" for list in keys: try: if all[ email ][ list ]: output += " \n" except KeyError: output += " \n" output += "\n" output += "
" + list + "
" + email + "" + str(all[ email ][ list ]) + "0
" return output def main() : all = {} files=getfiles() for list in files: emails = getemail(list) for email in emails: if not email in all.keys(): all[ email ] = {} if not list in all[ email ].keys(): all[ email ][list] = 0 all[ email ][ list ] += 1 print parsehtml(all) main()