52 lines
1.7 KiB
Python
Executable File
52 lines
1.7 KiB
Python
Executable File
#!/usr/bin/python
|
|
# encoding: utf-8
|
|
|
|
import argparse
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
import sys
|
|
from collections import Counter
|
|
|
|
hits = Counter() # IP-d kust tuldi
|
|
urls = Counter() # URL-id mida külastati
|
|
agents = Counter() # User agent mida kasutati külastamisel
|
|
|
|
RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?) \+\d\d\d\d\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<agent>.+?)\""
|
|
|
|
for line in sys.stdin:
|
|
m = re.match(RE_LOG_ENTRY, line)
|
|
if not m:
|
|
sys.stderr.write("Ei suutnud parsida rida: %s" % line)
|
|
continue
|
|
|
|
# Ignoreeri localhostist ja sisevõrgust pärinevaid päringuid (a'la nagios)
|
|
if m.group("remote_addr").startswith("127.") or m.group("remote_addr").startswith("192.168."):
|
|
continue
|
|
|
|
# Jäta vahele OPTIONS päringud
|
|
if m.group("verb") not in ("GET", "POST"):
|
|
continue
|
|
|
|
# Jäta vahele botid ja otsingumootorid
|
|
if re.search("(crawler|spider|Nuhk|Googlebot|yahoo|yandex)", m.group("agent")):
|
|
continue
|
|
|
|
dt = datetime.strptime(m.group("timestamp"), "%d/%b/%Y:%H:%M:%S")
|
|
hits[m.group("remote_addr")] += 1
|
|
urls[m.group("path")] += 1
|
|
agents[m.group("agent")] += 1
|
|
|
|
|
|
print "Top 5 enim külastatud URL-i veebiserveris:"
|
|
for path, count in urls.most_common(5):
|
|
sys.stdout.write("% 9d %s\n" % (count, path))
|
|
print
|
|
print "Top 5 enim külastusi teinud IP aadressid:"
|
|
for remote_addr, count in hits.most_common(5):
|
|
sys.stdout.write("% 9d %s\n" % (count, remote_addr))
|
|
print
|
|
print "Top 5 enim kasutatud veebilehitsejad/OS-id:"
|
|
for user_agent, count in agents.most_common(5):
|
|
sys.stdout.write("% 9d %s\n" % (count, user_agent))
|
|
|