Lisatud bin kaust näidistega

This commit is contained in:
2018-06-17 23:14:16 +03:00
parent ae9c681c6d
commit 4c1b365b06
10 changed files with 409 additions and 33 deletions

View File

@@ -1,7 +1,42 @@
# Apache logide parsija
Komplektne näide kuidas Apache2 logisid parsida ning raporteerida huvitavamad killud:
Failis ```main.py``` on näide kuidas Apache2 logisid parsida ning raporteerida huvitavamad killud:
* Parsib Apache logifaili kirjed ApacheLogParser klassi abil mis on kirjeldatud failis log_parsers.py
* Värvib kaardi faili BlankMap-World6.svg ning salvestab top.svg faili sisse
* Laadib ```requests``` mooduli abil alla BlankMap-World6.svg, värvib selle ära ```lxml``` mooduli abil ning salvestab top.svg faili sisse
* Genereerib Bootstrap baasil koostatud veebilehe mallist template.html faili raport.html
Sõltuvuste paigladamiseks:
```bash
apt install -y python-jinja2 python-lxml python-pygal python-geoip python-numpy python-matplotlib
dnf install -y python-jinja2 python-lxml python-pygal python2-GeoIP python2-numpy python2-matplotlib
```
Käivitamiseks
```bash
(cat /path/to/access.log; zcat /path/to/access.log.1.gz) | python main.py
(cat /path/to/access.log; zcat /path/to/access.log.[12].gz) | python main.py
(cat /path/to/access.log; zcat /path/to/access.log.[1-5].gz) | python main.py
(cat /path/to/access.log; zcat /path/to/access.log.*.gz) | python main.py
```
# Java rakenduste stack trace'de kokku korjaja
Failis ```main2.py``` on näide kuidas Java rakenduse logikirjetest stack trace'd kokku koguda:
* Ühisosa Apache logide parsijaga on ```GenericParser``` klass milles sisaldub üldine logide parsimise loogika
Sõltuvuste paigladamiseks:
```bash
apt install -y python-jinja2
dnf install -y python-jinja2
```
Käivitamiseks:
```bash
cat blah.log | python main2.py > raport2.html
```

View File

@@ -15,37 +15,53 @@ class GenericParser(object):
line_count = 0
event_count = 0
for line in self.fh:
byte_count += len(line) # loenda baite
# Loenda logikirjete arv ja maht
byte_count += len(line)
line_count += 1
if not line.strip(): # jäta vahele tühjad read
# Normaliseeri reavahetused
line = line.replace("\r\n", "\n")
# Jäta vahele tühjad read
if not line.strip():
continue
# Püüa regulaaravaldise järgi rida tükkideks võtta
m = re.match(self.RE_LOG_ENTRY, line)
# Kui rida klappis regexiga
if m:
# Väljasta eelmine kokku kleebitud logikirje
if log_entry:
if self.errors_only and not self.is_serious(log_entry):
continue
stack_trace = "\n".join(multiline_message.split("\n")[1:])
event_count += 1
row = \
datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \
log_entry, stack_trace, byte_count, line_count, event_count
# See teeb funktsioonist generaatori/iteraatori
yield row
multiline_message = line
# Kui vaja filtreerida, välista logikirjed mis ei ole veateatega seotud
if not self.errors_only or self.is_serious(log_entry):
event_count += 1
# yield teeb funktsioonist generaatori/iteraatori
# https://pythontips.com/2013/09/29/the-python-yield-keyword-explained/
yield datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \
log_entry, multiline_message, byte_count, line_count, event_count
# Alusta järgmise sõnumi kokku kleepimist
multiline_message = m.group("message")
log_entry = m.groupdict()
else:
elif line.startswith("\t") or line.startswith("Caused by") or line.startswith("org."):
multiline_message += line
else:
sys.stderr.write("Ei suutnud parsida rida:" + line)
class JavaLogParser(GenericParser):
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
RE_LOG_ENTRY = "(?P<timestamp>.+?) (?P<severity>[A-Z]+) 1 --- \[(?P<thread>.+)\](?P<class>.+) +: (?P<message>.+)"
RE_LOG_ENTRY = "(?P<timestamp>.+?) +(?P<severity>[A-Z]+) 1 --- \[(?P<thread>.+)\](?P<class>.+?) *: (?P<message>.+)"
def is_serious(self, log_entry):
return log_entry.get("severity") == "ERROR"
class ApacheLogParser(GenericParser):
RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?) \+\d\d\d\d\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<user_agent>.+?)\""
RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?) \+\d\d\d\d\] \"(?P<message>(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01])\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<user_agent>.+?)\""
TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S"
def is_serious(self, log_entry):

View File

@@ -1,21 +1,6 @@
#!/usr/bin/python
# encoding: utf-8
"""
Sõltuvuste paigladamiseks:
apt install -y python-jinja2 python-lxml python-pygal python-geoip python-numpy python-matplotlib
dnf install -y python-jinja2 python-lxml python-pygal python2-GeoIP python2-numpy python2-matplotlib
Käivitamiseks
(cat /path/to/access.log; zcat /path/to/access.log.1.gz) | python main.py
(cat /path/to/access.log; zcat /path/to/access.log.[12].gz) | python main.py
(cat /path/to/access.log; zcat /path/to/access.log.[1-5].gz) | python main.py
(cat /path/to/access.log; zcat /path/to/access.log.*.gz) | python main.py
"""
import GeoIP
import re
import sys
@@ -42,7 +27,8 @@ for timestamp, log_entry, stack_trace, byte_count, line_count, event_count in Ap
if int(log_entry.get("status")) < 400: # 2xx ja 3xx arvestamiseks
hits_per_path[log_entry.get("path")] += 1
hits_per_remote_addr[log_entry.get("remote_addr")] += 1
hits_per_user_agent[log_entry.get("user_agent")] += 1
if "bot" not in log_entry.get("user_agent").lower():
hits_per_user_agent[log_entry.get("user_agent")] += 1
hits_per_country[country_code] += 1
hits_per_date[timestamp.date()] += 1
bytes_per_date[timestamp.date()] += int(log_entry.get("size"))
@@ -64,13 +50,26 @@ buf = requests.get("https://upload.wikimedia.org/wikipedia/commons/0/03/BlankMap
# Parsi XML puu
map_document = etree.fromstring(buf)
# Iga riigikoodi ja sellest riigist pärit päringute arvu kohta
for country, count in hits_per_country.items():
if not country:
# Mõni IP ei pruukinud laheneda riigikoodiks (sisevõrk jms)
continue
# Interpoleeri päringute arv vahemikust 0 ... maksimaalsete päringutega riik
# vahemikku 180 (sinakas toon) ... 0 (punane)
# võrdväärne rida: hue = 180 - 180 * count / max(hits_per_country.values())
hue = interp(count, [0, max(hits_per_country.values())], [180, 0])
# Nopi rekursiivselt dokumendist välja kõik elemendid mille 'id' attribuut on riigikoodiga
# Kaardis vastab sellele küll ainult üks <g> ehk grupi element
for element in map_document.xpath("//*[@id='%s']" % country.lower()):
# Lisa CSS-i stiili attribuut <g> elemendile taustavärvi muutmiseks
element.set("style", "fill:hsl(%.2f, 60%%, 60%%)" % hue)
# Polügonid mis on grupi sees, neilt eemalda attribuut 'class' mille
# abil pannakse vaikimisi hall värv külge
for subelement in element:
subelement.attrib.pop("class", "")

51
raport/main2.py Normal file
View File

@@ -0,0 +1,51 @@
#!/usr/bin/python
# encoding: utf-8
import GeoIP
import re
import sys
from collections import Counter
from log_parsers import JavaLogParser
sys.stderr.write("Loen standardsisendist...\n")
errors = Counter()
for timestamp, log_entry, stack_trace, byte_count, line_count, event_count in JavaLogParser(sys.stdin, errors_only=True):
errors[stack_trace] += 1
from jinja2 import Template
import codecs
HTML_TEMPLATE = u"""<!DOCTYPE>
<html>
<head>
<title>Apache logide raport</title>
<meta charset="utf-8"/>
<link href="https://getbootstrap.com/docs/4.1/dist/css/bootstrap.min.css" rel="stylesheet"/>
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" crossorigin="anonymous"></script>
<script src="https://getbootstrap.com/docs/4.1/dist/js/bootstrap.min.js"></script>
</head>
<body>
<div class="container">
{% for stack_trace, count in errors.items() %}
<div class="row mt-3">
<div class="md-12">
{{ count }} korda esinenud viga
<button class="btn btn-primary" type="button" data-toggle="collapse"
data-target="#stack-{{ loop.index }}">Stack trace</button>
</div>
</div>
<div class="row">
<div class="md-12">
<div id="stack-{{ loop.index }}" class="collapse">
<pre>{{ stack_trace }}</pre>
</div>
</div>
</div>
{% endfor %}
</div>
</body>
</html>
"""
template = Template(HTML_TEMPLATE)
print template.render(locals())