#!/usr/bin/python
# encoding: utf-8
import os
import re
import gzip
import sys
from datetime import datetime, timedelta

try:
    dir_logs = sys.argv[1]
except IndexError:
    dir_logs = "/var/log"

try:
    scope_begin = datetime.strptime(sys.argv[2], "%Y-%m-%d %H:%M:%S")
except ValueError:
    # Ei suutnud parsida kellaaega YYYY-mm-dd HH:MM:SS formaadis,
    # proovime test formaati veel
    scope_begin = datetime.strptime(sys.argv[2], "%d/%m/%Y %H:%M:%S")
except IndexError:
    scope_begin = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)

try:
    duration = int(sys.argv[3])
except IndexError:
    duration = 5

scope_end = scope_begin + timedelta(minutes=duration)
sys.stderr.write("Otsin logikirjeid vahemikus %s kuni %s kaustast %s\n" % (scope_begin, scope_end, dir_logs))

RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?)\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<agent>.+?)\""

# os.walk käib rekursiivselt kataloogipuu läbi
for root, dirs, files in os.walk(dir_logs):
    # iga kausta kohta (root) on kättesaadav alamkataloogide nimekiri (dirs)
    # ning failide nimekiri (files)
    for filename in files:
        if filename.startswith("access.log"):
            path = os.path.join(root, filename)
            file_end = datetime.fromtimestamp(
                os.stat(path).st_mtime)
            if scope_begin > file_end:
                sys.stderr.write("Jätan vahele %s faili kuna logifaili lõpp oli %s\n" %
                    (path, file_end))
                continue

            file_start = None
            with gzip.open(path) if path.endswith(".gz") else open(path) as fh:
                for line in fh:
                    m = re.match(RE_LOG_ENTRY, line)
                    if not m:
                        continue
                    dt = datetime.strptime(m.group("timestamp")[:-6], "%d/%b/%Y:%H:%M:%S")
                    if not file_start: # loeme esimest rida sellest failist
                        file_start = dt
                        if scope_end < file_start:
                            sys.stderr.write("Jätan vahele %s faili kuna logifaili algus oli %s\n" % (path, file_start))
                            # Hüppa ridade lugemise tsükklist välja, järgmise faili juurde
                            break
                        else:
                            sys.stderr.write("Otsin logikirjeid failist %s\n" % path)
                    if dt > scope_end:
                        break
                    if dt > scope_begin:
                        print line.strip()