2018-06-15 21:21:13 +00:00
|
|
|
# encoding: utf-8
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
class GenericParser(object):
|
2018-06-15 21:55:57 +00:00
|
|
|
def __init__(self, fh, errors_only=False): # siia võid anda sys.stdin, gzip.open, open vms file handle tüüpi obj
|
2018-06-15 21:21:13 +00:00
|
|
|
self.fh = fh
|
2018-06-15 21:55:57 +00:00
|
|
|
self.errors_only = errors_only
|
2018-06-15 21:21:13 +00:00
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
multiline_message = ""
|
|
|
|
log_entry = None
|
|
|
|
byte_count = 0
|
|
|
|
line_count = 0
|
|
|
|
event_count = 0
|
|
|
|
for line in self.fh:
|
|
|
|
byte_count += len(line) # loenda baite
|
|
|
|
line_count += 1
|
|
|
|
|
|
|
|
if not line.strip(): # jäta vahele tühjad read
|
|
|
|
continue
|
|
|
|
m = re.match(self.RE_LOG_ENTRY, line)
|
|
|
|
if m:
|
2018-06-15 21:55:57 +00:00
|
|
|
if log_entry:
|
|
|
|
if self.errors_only and not self.is_serious(log_entry):
|
|
|
|
continue
|
2018-06-15 21:21:13 +00:00
|
|
|
stack_trace = "\n".join(multiline_message.split("\n")[1:])
|
|
|
|
event_count += 1
|
|
|
|
row = \
|
|
|
|
datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \
|
|
|
|
log_entry, stack_trace, byte_count, line_count, event_count
|
|
|
|
# See teeb funktsioonist generaatori/iteraatori
|
|
|
|
yield row
|
|
|
|
multiline_message = line
|
|
|
|
log_entry = m.groupdict()
|
|
|
|
else:
|
|
|
|
multiline_message += line
|
|
|
|
|
|
|
|
class JavaLogParser(GenericParser):
|
|
|
|
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
|
|
|
|
RE_LOG_ENTRY = "(?P<timestamp>.+?) (?P<severity>[A-Z]+) 1 --- \[(?P<thread>.+)\](?P<class>.+) +: (?P<message>.+)"
|
|
|
|
|
|
|
|
def is_serious(self, log_entry):
|
|
|
|
return log_entry.get("severity") == "ERROR"
|
|
|
|
|
|
|
|
class ApacheLogParser(GenericParser):
|
|
|
|
RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?) \+\d\d\d\d\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<user_agent>.+?)\""
|
|
|
|
TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S"
|
|
|
|
|
|
|
|
def is_serious(self, log_entry):
|
|
|
|
return int(log_entry.get("status")) >= 400
|