Apache parsija täielik näidis
This commit is contained in:
		
							
								
								
									
										34
									
								
								raport.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								raport.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | |||||||
|  | #!/usr/bin/python | ||||||
|  | # encoding: utf-8 | ||||||
|  |  | ||||||
|  | """ | ||||||
|  | Parsi standardsisendist Apache logikirjed ja kuva edetabelid | ||||||
|  | Käivitamiseks: (cat access.log; zcat access.log.1.gz) | python raport.py | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | import re | ||||||
|  | import sys | ||||||
|  | from collections import Counter | ||||||
|  | from log_parsers import ApacheLogParser | ||||||
|  |  | ||||||
|  | hits = Counter() # IP-d kust tuldi | ||||||
|  | urls = Counter() # URL-id mida külastati | ||||||
|  | agents = Counter() # User agent mida kasutati külastamisel | ||||||
|  |  | ||||||
|  | for timestamp, log_entry, stack_trace in ApacheLogParser(sys.stdin): | ||||||
|  |     urls[log_entry.get("path")] += 1 | ||||||
|  |     hits[log_entry.get("remote_addr")] += 1 | ||||||
|  |     agents[log_entry.get("user_agent")] += 1 | ||||||
|  |  | ||||||
|  | print "Top5 külastatud URL-id veebiserveris:" | ||||||
|  | for path, count in urls.most_common(5): | ||||||
|  |     sys.stdout.write("% 9d %s\n" % (count, path)) | ||||||
|  | print | ||||||
|  | print "Top5 külastusi teinud IP aadressid:" | ||||||
|  | for remote_addr, count in hits.most_common(5): | ||||||
|  |     sys.stdout.write("% 9d %s\n" % (count, remote_addr)) | ||||||
|  | print | ||||||
|  | print "Top5 kasutatud veebilehitsejad/OS-id:" | ||||||
|  | for user_agent, count in agents.most_common(5): | ||||||
|  |     sys.stdout.write("% 9d %s\n" % (count, user_agent)) | ||||||
|  |  | ||||||
							
								
								
									
										7
									
								
								raport/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								raport/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | |||||||
|  | # Apache logide parsija | ||||||
|  |  | ||||||
|  | Komplektne näide kuidas Apache2 logisid parsida ning raporteerida huvitavamad killud: | ||||||
|  |  | ||||||
|  | * Parsib Apache logifaili kirjed ApacheLogParser klassi abil mis on kirjeldatud failis log_parsers.py | ||||||
|  | * Värvib kaardi faili BlankMap-World6.svg ning salvestab top.svg faili sisse | ||||||
|  | * Genereerib Bootstrap baasil koostatud veebilehe mallist template.html faili raport.html | ||||||
							
								
								
									
										49
									
								
								raport/log_parsers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								raport/log_parsers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | |||||||
|  | # encoding: utf-8 | ||||||
|  | import re | ||||||
|  | import sys | ||||||
|  | from datetime import datetime | ||||||
|  |  | ||||||
|  | class GenericParser(object): | ||||||
|  |     def __init__(self, fh): # siia võid anda sys.stdin, gzip.open, open vms file handle tüüpi obj | ||||||
|  |         self.fh = fh | ||||||
|  |  | ||||||
|  |     def __iter__(self): | ||||||
|  |         multiline_message = "" | ||||||
|  |         log_entry = None | ||||||
|  |         byte_count = 0 | ||||||
|  |         line_count = 0 | ||||||
|  |         event_count = 0 | ||||||
|  |         for line in self.fh: | ||||||
|  |             byte_count += len(line) # loenda baite | ||||||
|  |             line_count += 1 | ||||||
|  |  | ||||||
|  |             if not line.strip(): # jäta vahele tühjad read | ||||||
|  |                 continue | ||||||
|  |             m = re.match(self.RE_LOG_ENTRY, line) | ||||||
|  |             if m: | ||||||
|  |                 if log_entry and self.is_serious(log_entry): | ||||||
|  |                     stack_trace = "\n".join(multiline_message.split("\n")[1:]) | ||||||
|  |                     event_count += 1 | ||||||
|  |                     row = \ | ||||||
|  |                         datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \ | ||||||
|  |                         log_entry, stack_trace, byte_count, line_count, event_count | ||||||
|  |                     # See teeb funktsioonist generaatori/iteraatori | ||||||
|  |                     yield row | ||||||
|  |                 multiline_message = line | ||||||
|  |                 log_entry = m.groupdict() | ||||||
|  |             else: | ||||||
|  |                 multiline_message += line | ||||||
|  |  | ||||||
|  | class JavaLogParser(GenericParser): | ||||||
|  |     TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S.%f" | ||||||
|  |     RE_LOG_ENTRY = "(?P<timestamp>.+?) (?P<severity>[A-Z]+) 1 --- \[(?P<thread>.+)\](?P<class>.+) +: (?P<message>.+)" | ||||||
|  |  | ||||||
|  |     def is_serious(self, log_entry): | ||||||
|  |         return log_entry.get("severity") == "ERROR" | ||||||
|  |  | ||||||
|  | class ApacheLogParser(GenericParser): | ||||||
|  |     RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?) \+\d\d\d\d\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<user_agent>.+?)\"" | ||||||
|  |     TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S" | ||||||
|  |  | ||||||
|  |     def is_serious(self, log_entry): | ||||||
|  |         return int(log_entry.get("status")) >= 400 | ||||||
							
								
								
									
										129
									
								
								raport/main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								raport/main.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,129 @@ | |||||||
|  | #!/usr/bin/python | ||||||
|  | # encoding: utf-8 | ||||||
|  |  | ||||||
|  | """ | ||||||
|  | Sõltuvuste paigladamiseks: | ||||||
|  |  | ||||||
|  |   apt install -y python-jinja2 python-lxml python-pygal | ||||||
|  |   dnf install -y python-jinja2 python-lxml python-pygal | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | import GeoIP | ||||||
|  | import re | ||||||
|  | import sys | ||||||
|  | from collections import Counter | ||||||
|  | from datetime import datetime | ||||||
|  | from log_parsers import ApacheLogParser | ||||||
|  |  | ||||||
|  | then = datetime.now() | ||||||
|  | hits_per_remote_addr = Counter() # IP-d kust tuldi | ||||||
|  | hits_per_path = Counter() # URL-id mida külastati | ||||||
|  | hits_per_user_agent = Counter() # User agent mida kasutati külastamisel | ||||||
|  | hits_per_country = Counter() # Riigid kust päringud tulid | ||||||
|  | hits_per_date = Counter() # Kuupäevad mil logikirjeid oli | ||||||
|  | bytes_per_date = Counter() | ||||||
|  |  | ||||||
|  | gi = GeoIP.open("/usr/share/GeoIP/GeoIP.dat", GeoIP.GEOIP_MEMORY_CACHE) | ||||||
|  |  | ||||||
|  | print "Loen standardsisendist..." | ||||||
|  | for timestamp, log_entry, stack_trace, byte_count, line_count, event_count in ApacheLogParser(sys.stdin): | ||||||
|  |     country_code = gi.country_code_by_addr(log_entry.get("remote_addr")) | ||||||
|  |     hits_per_path[log_entry.get("path")] += 1 | ||||||
|  |     hits_per_remote_addr[log_entry.get("remote_addr")] += 1 | ||||||
|  |     hits_per_user_agent[log_entry.get("user_agent")] += 1 | ||||||
|  |     hits_per_country[country_code] += 1 | ||||||
|  |     hits_per_date[timestamp.date()] += 1 | ||||||
|  |     bytes_per_date[timestamp.date()] += int(log_entry.get("size")) | ||||||
|  |  | ||||||
|  | # Leia kõige esimene kuupäev ning viimane kuupäev millal sündmused esinesid | ||||||
|  | first_date, last_date = min(hits_per_date.keys()), max(hits_per_date.keys()) | ||||||
|  |  | ||||||
|  | ############################ | ||||||
|  | ### Värvi riigid kaardil ### | ||||||
|  | ############################ | ||||||
|  |  | ||||||
|  | import requests | ||||||
|  | from lxml import etree | ||||||
|  | from numpy import interp | ||||||
|  |  | ||||||
|  | # Laadi alla kaart wikimedia veebist | ||||||
|  | print "Laadin alla kaarti..." | ||||||
|  | buf = requests.get("https://upload.wikimedia.org/wikipedia/commons/0/03/BlankMap-World6.svg").content | ||||||
|  |  | ||||||
|  | # Parsi XML puu | ||||||
|  | map_document = etree.fromstring(buf) | ||||||
|  | for country, count in hits_per_country.items(): | ||||||
|  |     if not country: | ||||||
|  |         # Mõni IP ei pruukinud laheneda riigikoodiks (sisevõrk jms) | ||||||
|  |         continue | ||||||
|  |     hue = interp(count, [0, max(hits_per_country.values())], [180, 0]) | ||||||
|  |     for element in map_document.xpath("//*[@id='%s']" % country.lower()): | ||||||
|  |         element.set("style", "fill:hsl(%.2f, 60%%, 60%%)" % hue) | ||||||
|  |         for subelement in element: | ||||||
|  |             subelement.attrib.pop("class", "") | ||||||
|  |  | ||||||
|  | with open("top.svg", "wb") as fh: | ||||||
|  |     fh.write(etree.tostring(map_document)) | ||||||
|  | print "Kaart salvestatud faili top.svg" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #################################################### | ||||||
|  | ### Koosta päringute arvu graafik päevade lõikes ### | ||||||
|  | #################################################### | ||||||
|  |  | ||||||
|  | import matplotlib.pyplot as plt | ||||||
|  | fig = plt.figure(figsize=(10, 7)) | ||||||
|  |  | ||||||
|  | # Lisa joonise sisse kaks graafikut | ||||||
|  | sub = fig.add_subplot(2, 1, 1) | ||||||
|  | sub2 = fig.add_subplot(2, 1, 2) | ||||||
|  |  | ||||||
|  | sub.set_xlabel(u"Päringute arv") | ||||||
|  | sub2.set_xlabel(u"Liikluse maht baitides") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Ploti andmepunktid | ||||||
|  | sub.barh(hits_per_date.keys(), hits_per_date.values()) | ||||||
|  | sub2.barh(bytes_per_date.keys(), bytes_per_date.values()) | ||||||
|  |  | ||||||
|  | # Salvesta faili | ||||||
|  | fig.savefig("bar.svg", format="svg") | ||||||
|  | fig.savefig("bar.png") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ####################################### | ||||||
|  | ### Koosta veebilehitsejate graafik ### | ||||||
|  | ####################################### | ||||||
|  |  | ||||||
|  | import pygal | ||||||
|  | line_chart = pygal.Pie( | ||||||
|  |     truncate_legend=50, # legend kuni 50 karakterit | ||||||
|  |     width=1000, height=300, # graafiku laius/kõrgus pikslites | ||||||
|  |     style=pygal.style.Style(background='transparent')) # eemalda taustavärv | ||||||
|  | line_chart.config(style_name = 'LightStyle', fill=None) | ||||||
|  | line_chart.title = 'Veebilehitsejate osakaal' | ||||||
|  | for user_agent, count in hits_per_user_agent.most_common(7): | ||||||
|  |     line_chart.add(user_agent, count) # lisa graafikule | ||||||
|  |     hits_per_user_agent.pop(user_agent) # eemalda counteri objektist | ||||||
|  | line_chart.add("Muud veebilehitsejad", sum(hits_per_user_agent.values())) | ||||||
|  |  | ||||||
|  | user_agent_chart = line_chart.render(is_unicode=True, disable_xml_declaration=True) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | from jinja2 import Template | ||||||
|  | import codecs | ||||||
|  |  | ||||||
|  | # Loe jinja mall UTF-8 tekstifailist | ||||||
|  | with codecs.open("template.html", "rb", encoding="utf-8") as fh: | ||||||
|  |     template = Template(fh.read()) | ||||||
|  |     # Süsti malli sisse kõik kohalikud muutujad (first_date, last_date, hits, urls jne) | ||||||
|  |     buf = template.render(locals()) | ||||||
|  |  | ||||||
|  |     # Salvesta täidetud leht UTF-8 kodeeringus | ||||||
|  |     with codecs.open("raport.html", "wb", encoding="utf-8") as fh: | ||||||
|  |         fh.write(buf) | ||||||
|  |  | ||||||
|  | print "HTML kujul raport savlestatud faili raport.html" | ||||||
|  |  | ||||||
|  |  | ||||||
							
								
								
									
										103
									
								
								raport/template.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								raport/template.html
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | |||||||
|  | <!DOCTYPE> | ||||||
|  | <html> | ||||||
|  |   <head> | ||||||
|  |     <title>Apache logide raport</title> | ||||||
|  |     <meta charset="utf-8"/> | ||||||
|  |     <link href="https://getbootstrap.com/docs/4.1/dist/css/bootstrap.min.css" rel="stylesheet"/> | ||||||
|  |     <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" crossorigin="anonymous"></script> | ||||||
|  |     <script src="https://getbootstrap.com/docs/4.1/dist/js/bootstrap.min.js"></script> | ||||||
|  |     <script type="text/javascript" src="http://kozea.github.com/pygal.js/latest/pygal-tooltips.min.js"></script> | ||||||
|  |     <style> | ||||||
|  |     svg, img { | ||||||
|  |         max-width: 80%; | ||||||
|  |     } | ||||||
|  |     </style> | ||||||
|  |   </head> | ||||||
|  |   <body> | ||||||
|  |     <!-- Navigeerimise lingid, lehe sees --> | ||||||
|  |     <nav class="navbar navbar-expand-lg navbar-light bg-light fixed-top"> | ||||||
|  |       <a class="navbar-brand" href="#">Apache2 logid</a> | ||||||
|  |       <div class="collapse navbar-collapse" id="navbarSupportedContent"> | ||||||
|  |         <ul class="navbar-nav mr-auto"> | ||||||
|  |           <li class="nav-item"><a class="nav-link" href="#home">Üles</a></li> | ||||||
|  |           <li class="nav-item"><a class="nav-link" href="#per_date">Liiklus</a></li> | ||||||
|  |           <li class="nav-item"><a class="nav-link" href="#user_agents">Veebilehitsejad</a></li> | ||||||
|  |           <li class="nav-item"><a class="nav-link" href="#urls">URL-id</a></li> | ||||||
|  |           <li class="nav-item"><a class="nav-link" href="#remote_addrs">IP-aadressid</a></li> | ||||||
|  |           <li class="nav-item"><a class="nav-link" href="#countries">Riigid</a></li> | ||||||
|  |         </ul> | ||||||
|  |       </div> | ||||||
|  |     </nav> | ||||||
|  |       <div class="container"> | ||||||
|  |         <br/> | ||||||
|  |         <br/> | ||||||
|  |         <br/> | ||||||
|  |         <div class="row mt-3" id="home"> | ||||||
|  |           <div class="col-md-12"> | ||||||
|  |             <h2>Sisendandmed</h2> | ||||||
|  |             <p> | ||||||
|  |             Läbi näritud {{ byte_count | filesizeformat }} andmeid, | ||||||
|  |             {{ line_count }} rida, {{ event_count }} logikirjet. | ||||||
|  |             Logikirjed | ||||||
|  |             {% if first_date == last_date %} | ||||||
|  |                 päeval {{ first_date }} | ||||||
|  |             {% else %} | ||||||
|  |               {{ first_date }} kuni {{ last_date }} | ||||||
|  |             {% endif %} | ||||||
|  |             </p> | ||||||
|  |           </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="row mt-3" id="per_date"> | ||||||
|  |           <div class="col-md-12"> | ||||||
|  |             <h2>Kuupäevad</h2> | ||||||
|  |             <p>Päringute arv ning päringute maht kuupäevade kaupa grupeeritult, | ||||||
|  |             ploteeritud <a href="https://matplotlib.org/" target="_blank">matplotlib</a> abil</p> | ||||||
|  |             <img src="bar.svg"/> | ||||||
|  |             <h2 id="user_agents">Top 10 veebilehitsejad</h2> | ||||||
|  |             <p>Interaktiivne graafik ploteeritud <a href="http://pygal.org/en/stable/" target="_blank">pygal</a> abil | ||||||
|  |             <div> | ||||||
|  |               {{ user_agent_chart }} | ||||||
|  |             </div> | ||||||
|  |           </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="row mt-3" id="urls"> | ||||||
|  |           <div class="col-md-12"> | ||||||
|  |             <h2>Top 10 URL-id</h2> | ||||||
|  |             <div> | ||||||
|  |               <ol> | ||||||
|  |                 {% for path, count in hits_per_path.most_common(10) %} | ||||||
|  |                   <li><a href="http://enos.itcollege.ee{{ path }}" target="_blank">{{ path }}</a>: {{ count }}</li> | ||||||
|  |                 {% endfor %} | ||||||
|  |               </ol> | ||||||
|  |             </div> | ||||||
|  |           </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="row mt-3" id="remote_addrs"> | ||||||
|  |           <div class="col-md-12"> | ||||||
|  |             <h2>Top10 IP aadressid</h2> | ||||||
|  |             <div> | ||||||
|  |               <ol> | ||||||
|  |                 {% for remote_addr, count in hits_per_remote_addr.most_common(10) %} | ||||||
|  |                   <li><a href="http://geoiplookup.net/ip/{{ remote_addr }}" target="_blank">{{ remote_addr }}</a>: {{ count }}</li> | ||||||
|  |                 {% endfor %} | ||||||
|  |               </ol> | ||||||
|  |             </div> | ||||||
|  |           </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="row mt-3" id="countries"> | ||||||
|  |           <div class="col-md-12"> | ||||||
|  |             <h2>Külastused riikide kaupa</h2> | ||||||
|  |             <p>Loetud XML failist, värvitud lxml mooduli abil ning salvestatud top.svg faili sisse:</p> | ||||||
|  |             <div> | ||||||
|  |                 <img src="top.svg"/> | ||||||
|  |             </div> | ||||||
|  |           </div> | ||||||
|  |         </div> | ||||||
|  |         <div class="row mt-3" id="remote_addrs"> | ||||||
|  |           <div class="col-md-12"> | ||||||
|  |             <p>Fail genereeriti {{ datetime.now() }}, võttis aega {{ datetime.now() - then }}</p> | ||||||
|  |           </div> | ||||||
|  |         </div> | ||||||
|  |       </div> | ||||||
|  |   </body> | ||||||
|  | </html> | ||||||
		Reference in New Issue
	
	Block a user