# encoding: utf-8 import re import sys from datetime import datetime class GenericParser(object): def __init__(self, fh, errors_only=False): # siia võid anda sys.stdin, gzip.open, open vms file handle tüüpi obj self.fh = fh self.errors_only = errors_only def __iter__(self): multiline_message = "" log_entry = None byte_count = 0 line_count = 0 event_count = 0 for line in self.fh: byte_count += len(line) # loenda baite line_count += 1 if not line.strip(): # jäta vahele tühjad read continue m = re.match(self.RE_LOG_ENTRY, line) if m: if log_entry: if self.errors_only and not self.is_serious(log_entry): continue stack_trace = "\n".join(multiline_message.split("\n")[1:]) event_count += 1 row = \ datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \ log_entry, stack_trace, byte_count, line_count, event_count # See teeb funktsioonist generaatori/iteraatori yield row multiline_message = line log_entry = m.groupdict() else: multiline_message += line class JavaLogParser(GenericParser): TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S.%f" RE_LOG_ENTRY = "(?P.+?) (?P[A-Z]+) 1 --- \[(?P.+)\](?P.+) +: (?P.+)" def is_serious(self, log_entry): return log_entry.get("severity") == "ERROR" class ApacheLogParser(GenericParser): RE_LOG_ENTRY = "(?P.+?) - (?P.+?) \[(?P.+?) \+\d\d\d\d\] \"(?P[A-Z]+) (?P.+) HTTP/1.[01]\" (?P\d+) (?P\d+) \"(?P.+?)\" \"(?P.+?)\"" TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S" def is_serious(self, log_entry): return int(log_entry.get("status")) >= 400