# apachelog.py # # Parse an apache log file into a sequence of dictionaries from fieldmap import * import re logpats = r'(\S+) (\S+) (\S+) \[(.*?)\] ' \ r'"(\S+) (\S+) (\S+)" (\S+) (\S+)' logpat = re.compile(logpats) def apache_log(lines): groups = (logpat.match(line) for line in lines) tuples = (g.groups() for g in groups if g) colnames = ('host','referrer','user','datetime', 'method', 'request','proto','status','bytes') log = (dict(zip(colnames,t)) for t in tuples) log = field_map(log,"status",int) log = field_map(log,"bytes", lambda s: int(s) if s != '-' else 0) return log # Example use: if __name__ == '__main__': from linesdir import * lines = lines_from_dir("access-log*","www") log = apache_log(lines) for r in log: print r