# encoding: utf-8 import re import sys from datetime import datetime class GenericParser(object): def __init__(self, fh, errors_only=False): # siia võid anda sys.stdin, gzip.open, open vms file handle tüüpi obj self.fh = fh self.errors_only = errors_only def __iter__(self): multiline_message = "" log_entry = None byte_count = 0 line_count = 0 event_count = 0 for line in self.fh: # Loenda logikirjete arv ja maht byte_count += len(line) line_count += 1 # Normaliseeri reavahetused line = line.replace("\r\n", "\n") # Jäta vahele tühjad read if not line.strip(): continue # Püüa regulaaravaldise järgi rida tükkideks võtta m = re.match(self.RE_LOG_ENTRY, line) # Kui rida klappis regexiga if m: # Väljasta eelmine kokku kleebitud logikirje if log_entry: # Kui vaja filtreerida, välista logikirjed mis ei ole veateatega seotud if not self.errors_only or self.is_serious(log_entry): event_count += 1 # yield teeb funktsioonist generaatori/iteraatori # https://pythontips.com/2013/09/29/the-python-yield-keyword-explained/ yield datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \ log_entry, multiline_message, byte_count, line_count, event_count # Alusta järgmise sõnumi kokku kleepimist multiline_message = m.group("message") log_entry = m.groupdict() elif line.startswith("\t") or line.startswith("Caused by") or line.startswith("org."): multiline_message += line else: sys.stderr.write("Ei suutnud parsida rida:" + line) class JavaLogParser(GenericParser): TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S.%f" RE_LOG_ENTRY = "(?P.+?) +(?P[A-Z]+) 1 --- \[(?P.+)\](?P.+?) *: (?P.+)" def is_serious(self, log_entry): return log_entry.get("severity") == "ERROR" class ApacheLogParser(GenericParser): RE_LOG_ENTRY = "(?P.+?) - (?P.+?) \[(?P.+?) \+\d\d\d\d\] \"(?P(?P[A-Z]+) (?P.+) HTTP/1.[01])\" (?P\d+) (?P\d+) \"(?P.+?)\" \"(?P.+?)\"" TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S" def is_serious(self, log_entry): return int(log_entry.get("status")) >= 400