logide-parsimine/bin/filter2

62 lines
2.4 KiB
Python
Executable File

#!/usr/bin/python
# encoding: utf-8
import os
import re
import gzip
import sys
from datetime import datetime, timedelta
try:
dir_logs = sys.argv[1]
except IndexError:
dir_logs = "/var/log"
try:
scope_begin = datetime.strptime(sys.argv[2], "%Y-%m-%d %H:%M:%S")
except IndexError:
scope_begin = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
try:
duration = int(sys.argv[3])
except IndexError:
duration = 5
scope_end = scope_begin + timedelta(minutes=duration)
sys.stderr.write("Otsin logikirjeid vahemikus %s kuni %s kaustast %s\n" % (scope_begin, scope_end, dir_logs))
RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?)\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<agent>.+?)\""
# os.walk käib rekursiivselt kataloogipuu läbi
for root, dirs, files in os.walk(dir_logs):
# iga kausta kohta (root) on kättesaadav alamkataloogide nimekiri (dirs)
# ning failide nimekiri (files)
for filename in files:
if filename.startswith("access.log"):
path = os.path.join(root, filename)
file_end = datetime.fromtimestamp(
os.stat(path).st_mtime)
if scope_begin > file_end:
sys.stderr.write("Jätan vahele %s faili kuna logifaili lõpp oli %s\n" %
(path, file_end))
continue
file_start = None
with gzip.open(path) if path.endswith(".gz") else open(path) as fh:
for line in fh:
m = re.match(RE_LOG_ENTRY, line)
if not m:
continue
dt = datetime.strptime(m.group("timestamp")[:-6], "%d/%b/%Y:%H:%M:%S")
if not file_start: # loeme esimest rida sellest failist
file_start = dt
if scope_end < file_start:
sys.stderr.write("Jätan vahele %s faili kuna logifaili algus oli %s\n" % (path, file_start))
# Hüppa ridade lugemise tsükklist välja, järgmise faili juurde
break
else:
sys.stderr.write("Otsin logikirjeid failist %s\n" % path)
if dt > scope_end:
break
if dt > scope_begin:
print line.strip()