Apache parsija täielik näidis
This commit is contained in:
parent
3cde07e23a
commit
0649680459
34
raport.py
Normal file
34
raport.py
Normal file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/python
|
||||
# encoding: utf-8
|
||||
|
||||
"""
|
||||
Parsi standardsisendist Apache logikirjed ja kuva edetabelid
|
||||
Käivitamiseks: (cat access.log; zcat access.log.1.gz) | python raport.py
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from collections import Counter
|
||||
from log_parsers import ApacheLogParser
|
||||
|
||||
hits = Counter() # IP-d kust tuldi
|
||||
urls = Counter() # URL-id mida külastati
|
||||
agents = Counter() # User agent mida kasutati külastamisel
|
||||
|
||||
for timestamp, log_entry, stack_trace in ApacheLogParser(sys.stdin):
|
||||
urls[log_entry.get("path")] += 1
|
||||
hits[log_entry.get("remote_addr")] += 1
|
||||
agents[log_entry.get("user_agent")] += 1
|
||||
|
||||
print "Top5 külastatud URL-id veebiserveris:"
|
||||
for path, count in urls.most_common(5):
|
||||
sys.stdout.write("% 9d %s\n" % (count, path))
|
||||
print
|
||||
print "Top5 külastusi teinud IP aadressid:"
|
||||
for remote_addr, count in hits.most_common(5):
|
||||
sys.stdout.write("% 9d %s\n" % (count, remote_addr))
|
||||
print
|
||||
print "Top5 kasutatud veebilehitsejad/OS-id:"
|
||||
for user_agent, count in agents.most_common(5):
|
||||
sys.stdout.write("% 9d %s\n" % (count, user_agent))
|
||||
|
7
raport/README.md
Normal file
7
raport/README.md
Normal file
@ -0,0 +1,7 @@
|
||||
# Apache logide parsija
|
||||
|
||||
Komplektne näide kuidas Apache2 logisid parsida ning raporteerida huvitavamad killud:
|
||||
|
||||
* Parsib Apache logifaili kirjed ApacheLogParser klassi abil mis on kirjeldatud failis log_parsers.py
|
||||
* Värvib kaardi faili BlankMap-World6.svg ning salvestab top.svg faili sisse
|
||||
* Genereerib Bootstrap baasil koostatud veebilehe mallist template.html faili raport.html
|
49
raport/log_parsers.py
Normal file
49
raport/log_parsers.py
Normal file
@ -0,0 +1,49 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
class GenericParser(object):
|
||||
def __init__(self, fh): # siia võid anda sys.stdin, gzip.open, open vms file handle tüüpi obj
|
||||
self.fh = fh
|
||||
|
||||
def __iter__(self):
|
||||
multiline_message = ""
|
||||
log_entry = None
|
||||
byte_count = 0
|
||||
line_count = 0
|
||||
event_count = 0
|
||||
for line in self.fh:
|
||||
byte_count += len(line) # loenda baite
|
||||
line_count += 1
|
||||
|
||||
if not line.strip(): # jäta vahele tühjad read
|
||||
continue
|
||||
m = re.match(self.RE_LOG_ENTRY, line)
|
||||
if m:
|
||||
if log_entry and self.is_serious(log_entry):
|
||||
stack_trace = "\n".join(multiline_message.split("\n")[1:])
|
||||
event_count += 1
|
||||
row = \
|
||||
datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \
|
||||
log_entry, stack_trace, byte_count, line_count, event_count
|
||||
# See teeb funktsioonist generaatori/iteraatori
|
||||
yield row
|
||||
multiline_message = line
|
||||
log_entry = m.groupdict()
|
||||
else:
|
||||
multiline_message += line
|
||||
|
||||
class JavaLogParser(GenericParser):
|
||||
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
|
||||
RE_LOG_ENTRY = "(?P<timestamp>.+?) (?P<severity>[A-Z]+) 1 --- \[(?P<thread>.+)\](?P<class>.+) +: (?P<message>.+)"
|
||||
|
||||
def is_serious(self, log_entry):
|
||||
return log_entry.get("severity") == "ERROR"
|
||||
|
||||
class ApacheLogParser(GenericParser):
|
||||
RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?) \+\d\d\d\d\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<user_agent>.+?)\""
|
||||
TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S"
|
||||
|
||||
def is_serious(self, log_entry):
|
||||
return int(log_entry.get("status")) >= 400
|
129
raport/main.py
Normal file
129
raport/main.py
Normal file
@ -0,0 +1,129 @@
|
||||
#!/usr/bin/python
|
||||
# encoding: utf-8
|
||||
|
||||
"""
|
||||
Sõltuvuste paigladamiseks:
|
||||
|
||||
apt install -y python-jinja2 python-lxml python-pygal
|
||||
dnf install -y python-jinja2 python-lxml python-pygal
|
||||
"""
|
||||
|
||||
import GeoIP
|
||||
import re
|
||||
import sys
|
||||
from collections import Counter
|
||||
from datetime import datetime
|
||||
from log_parsers import ApacheLogParser
|
||||
|
||||
then = datetime.now()
|
||||
hits_per_remote_addr = Counter() # IP-d kust tuldi
|
||||
hits_per_path = Counter() # URL-id mida külastati
|
||||
hits_per_user_agent = Counter() # User agent mida kasutati külastamisel
|
||||
hits_per_country = Counter() # Riigid kust päringud tulid
|
||||
hits_per_date = Counter() # Kuupäevad mil logikirjeid oli
|
||||
bytes_per_date = Counter()
|
||||
|
||||
gi = GeoIP.open("/usr/share/GeoIP/GeoIP.dat", GeoIP.GEOIP_MEMORY_CACHE)
|
||||
|
||||
print "Loen standardsisendist..."
|
||||
for timestamp, log_entry, stack_trace, byte_count, line_count, event_count in ApacheLogParser(sys.stdin):
|
||||
country_code = gi.country_code_by_addr(log_entry.get("remote_addr"))
|
||||
hits_per_path[log_entry.get("path")] += 1
|
||||
hits_per_remote_addr[log_entry.get("remote_addr")] += 1
|
||||
hits_per_user_agent[log_entry.get("user_agent")] += 1
|
||||
hits_per_country[country_code] += 1
|
||||
hits_per_date[timestamp.date()] += 1
|
||||
bytes_per_date[timestamp.date()] += int(log_entry.get("size"))
|
||||
|
||||
# Leia kõige esimene kuupäev ning viimane kuupäev millal sündmused esinesid
|
||||
first_date, last_date = min(hits_per_date.keys()), max(hits_per_date.keys())
|
||||
|
||||
############################
|
||||
### Värvi riigid kaardil ###
|
||||
############################
|
||||
|
||||
import requests
|
||||
from lxml import etree
|
||||
from numpy import interp
|
||||
|
||||
# Laadi alla kaart wikimedia veebist
|
||||
print "Laadin alla kaarti..."
|
||||
buf = requests.get("https://upload.wikimedia.org/wikipedia/commons/0/03/BlankMap-World6.svg").content
|
||||
|
||||
# Parsi XML puu
|
||||
map_document = etree.fromstring(buf)
|
||||
for country, count in hits_per_country.items():
|
||||
if not country:
|
||||
# Mõni IP ei pruukinud laheneda riigikoodiks (sisevõrk jms)
|
||||
continue
|
||||
hue = interp(count, [0, max(hits_per_country.values())], [180, 0])
|
||||
for element in map_document.xpath("//*[@id='%s']" % country.lower()):
|
||||
element.set("style", "fill:hsl(%.2f, 60%%, 60%%)" % hue)
|
||||
for subelement in element:
|
||||
subelement.attrib.pop("class", "")
|
||||
|
||||
with open("top.svg", "wb") as fh:
|
||||
fh.write(etree.tostring(map_document))
|
||||
print "Kaart salvestatud faili top.svg"
|
||||
|
||||
|
||||
####################################################
|
||||
### Koosta päringute arvu graafik päevade lõikes ###
|
||||
####################################################
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
fig = plt.figure(figsize=(10, 7))
|
||||
|
||||
# Lisa joonise sisse kaks graafikut
|
||||
sub = fig.add_subplot(2, 1, 1)
|
||||
sub2 = fig.add_subplot(2, 1, 2)
|
||||
|
||||
sub.set_xlabel(u"Päringute arv")
|
||||
sub2.set_xlabel(u"Liikluse maht baitides")
|
||||
|
||||
|
||||
# Ploti andmepunktid
|
||||
sub.barh(hits_per_date.keys(), hits_per_date.values())
|
||||
sub2.barh(bytes_per_date.keys(), bytes_per_date.values())
|
||||
|
||||
# Salvesta faili
|
||||
fig.savefig("bar.svg", format="svg")
|
||||
fig.savefig("bar.png")
|
||||
|
||||
|
||||
#######################################
|
||||
### Koosta veebilehitsejate graafik ###
|
||||
#######################################
|
||||
|
||||
import pygal
|
||||
line_chart = pygal.Pie(
|
||||
truncate_legend=50, # legend kuni 50 karakterit
|
||||
width=1000, height=300, # graafiku laius/kõrgus pikslites
|
||||
style=pygal.style.Style(background='transparent')) # eemalda taustavärv
|
||||
line_chart.config(style_name = 'LightStyle', fill=None)
|
||||
line_chart.title = 'Veebilehitsejate osakaal'
|
||||
for user_agent, count in hits_per_user_agent.most_common(7):
|
||||
line_chart.add(user_agent, count) # lisa graafikule
|
||||
hits_per_user_agent.pop(user_agent) # eemalda counteri objektist
|
||||
line_chart.add("Muud veebilehitsejad", sum(hits_per_user_agent.values()))
|
||||
|
||||
user_agent_chart = line_chart.render(is_unicode=True, disable_xml_declaration=True)
|
||||
|
||||
|
||||
|
||||
from jinja2 import Template
|
||||
import codecs
|
||||
|
||||
# Loe jinja mall UTF-8 tekstifailist
|
||||
with codecs.open("template.html", "rb", encoding="utf-8") as fh:
|
||||
template = Template(fh.read())
|
||||
# Süsti malli sisse kõik kohalikud muutujad (first_date, last_date, hits, urls jne)
|
||||
buf = template.render(locals())
|
||||
|
||||
# Salvesta täidetud leht UTF-8 kodeeringus
|
||||
with codecs.open("raport.html", "wb", encoding="utf-8") as fh:
|
||||
fh.write(buf)
|
||||
|
||||
print "HTML kujul raport savlestatud faili raport.html"
|
||||
|
||||
|
103
raport/template.html
Normal file
103
raport/template.html
Normal file
@ -0,0 +1,103 @@
|
||||
<!DOCTYPE>
|
||||
<html>
|
||||
<head>
|
||||
<title>Apache logide raport</title>
|
||||
<meta charset="utf-8"/>
|
||||
<link href="https://getbootstrap.com/docs/4.1/dist/css/bootstrap.min.css" rel="stylesheet"/>
|
||||
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" crossorigin="anonymous"></script>
|
||||
<script src="https://getbootstrap.com/docs/4.1/dist/js/bootstrap.min.js"></script>
|
||||
<script type="text/javascript" src="http://kozea.github.com/pygal.js/latest/pygal-tooltips.min.js"></script>
|
||||
<style>
|
||||
svg, img {
|
||||
max-width: 80%;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<!-- Navigeerimise lingid, lehe sees -->
|
||||
<nav class="navbar navbar-expand-lg navbar-light bg-light fixed-top">
|
||||
<a class="navbar-brand" href="#">Apache2 logid</a>
|
||||
<div class="collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<ul class="navbar-nav mr-auto">
|
||||
<li class="nav-item"><a class="nav-link" href="#home">Üles</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="#per_date">Liiklus</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="#user_agents">Veebilehitsejad</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="#urls">URL-id</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="#remote_addrs">IP-aadressid</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="#countries">Riigid</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<div class="container">
|
||||
<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
<div class="row mt-3" id="home">
|
||||
<div class="col-md-12">
|
||||
<h2>Sisendandmed</h2>
|
||||
<p>
|
||||
Läbi näritud {{ byte_count | filesizeformat }} andmeid,
|
||||
{{ line_count }} rida, {{ event_count }} logikirjet.
|
||||
Logikirjed
|
||||
{% if first_date == last_date %}
|
||||
päeval {{ first_date }}
|
||||
{% else %}
|
||||
{{ first_date }} kuni {{ last_date }}
|
||||
{% endif %}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row mt-3" id="per_date">
|
||||
<div class="col-md-12">
|
||||
<h2>Kuupäevad</h2>
|
||||
<p>Päringute arv ning päringute maht kuupäevade kaupa grupeeritult,
|
||||
ploteeritud <a href="https://matplotlib.org/" target="_blank">matplotlib</a> abil</p>
|
||||
<img src="bar.svg"/>
|
||||
<h2 id="user_agents">Top 10 veebilehitsejad</h2>
|
||||
<p>Interaktiivne graafik ploteeritud <a href="http://pygal.org/en/stable/" target="_blank">pygal</a> abil
|
||||
<div>
|
||||
{{ user_agent_chart }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row mt-3" id="urls">
|
||||
<div class="col-md-12">
|
||||
<h2>Top 10 URL-id</h2>
|
||||
<div>
|
||||
<ol>
|
||||
{% for path, count in hits_per_path.most_common(10) %}
|
||||
<li><a href="http://enos.itcollege.ee{{ path }}" target="_blank">{{ path }}</a>: {{ count }}</li>
|
||||
{% endfor %}
|
||||
</ol>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row mt-3" id="remote_addrs">
|
||||
<div class="col-md-12">
|
||||
<h2>Top10 IP aadressid</h2>
|
||||
<div>
|
||||
<ol>
|
||||
{% for remote_addr, count in hits_per_remote_addr.most_common(10) %}
|
||||
<li><a href="http://geoiplookup.net/ip/{{ remote_addr }}" target="_blank">{{ remote_addr }}</a>: {{ count }}</li>
|
||||
{% endfor %}
|
||||
</ol>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row mt-3" id="countries">
|
||||
<div class="col-md-12">
|
||||
<h2>Külastused riikide kaupa</h2>
|
||||
<p>Loetud XML failist, värvitud lxml mooduli abil ning salvestatud top.svg faili sisse:</p>
|
||||
<div>
|
||||
<img src="top.svg"/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row mt-3" id="remote_addrs">
|
||||
<div class="col-md-12">
|
||||
<p>Fail genereeriti {{ datetime.now() }}, võttis aega {{ datetime.now() - then }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user