Apache parsija täielik näidis
This commit is contained in:
		
							
								
								
									
										34
									
								
								raport.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								raport.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| #!/usr/bin/python | ||||
| # encoding: utf-8 | ||||
|  | ||||
| """ | ||||
| Parsi standardsisendist Apache logikirjed ja kuva edetabelid | ||||
| Käivitamiseks: (cat access.log; zcat access.log.1.gz) | python raport.py | ||||
| """ | ||||
|  | ||||
| import re | ||||
| import sys | ||||
| from collections import Counter | ||||
| from log_parsers import ApacheLogParser | ||||
|  | ||||
| hits = Counter() # IP-d kust tuldi | ||||
| urls = Counter() # URL-id mida külastati | ||||
| agents = Counter() # User agent mida kasutati külastamisel | ||||
|  | ||||
| for timestamp, log_entry, stack_trace in ApacheLogParser(sys.stdin): | ||||
|     urls[log_entry.get("path")] += 1 | ||||
|     hits[log_entry.get("remote_addr")] += 1 | ||||
|     agents[log_entry.get("user_agent")] += 1 | ||||
|  | ||||
| print "Top5 külastatud URL-id veebiserveris:" | ||||
| for path, count in urls.most_common(5): | ||||
|     sys.stdout.write("% 9d %s\n" % (count, path)) | ||||
| print | ||||
| print "Top5 külastusi teinud IP aadressid:" | ||||
| for remote_addr, count in hits.most_common(5): | ||||
|     sys.stdout.write("% 9d %s\n" % (count, remote_addr)) | ||||
| print | ||||
| print "Top5 kasutatud veebilehitsejad/OS-id:" | ||||
| for user_agent, count in agents.most_common(5): | ||||
|     sys.stdout.write("% 9d %s\n" % (count, user_agent)) | ||||
|  | ||||
							
								
								
									
										7
									
								
								raport/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								raport/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
| # Apache logide parsija | ||||
|  | ||||
| Komplektne näide kuidas Apache2 logisid parsida ning raporteerida huvitavamad killud: | ||||
|  | ||||
| * Parsib Apache logifaili kirjed ApacheLogParser klassi abil mis on kirjeldatud failis log_parsers.py | ||||
| * Värvib kaardi faili BlankMap-World6.svg ning salvestab top.svg faili sisse | ||||
| * Genereerib Bootstrap baasil koostatud veebilehe mallist template.html faili raport.html | ||||
							
								
								
									
										49
									
								
								raport/log_parsers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								raport/log_parsers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| # encoding: utf-8 | ||||
| import re | ||||
| import sys | ||||
| from datetime import datetime | ||||
|  | ||||
| class GenericParser(object): | ||||
|     def __init__(self, fh): # siia võid anda sys.stdin, gzip.open, open vms file handle tüüpi obj | ||||
|         self.fh = fh | ||||
|  | ||||
|     def __iter__(self): | ||||
|         multiline_message = "" | ||||
|         log_entry = None | ||||
|         byte_count = 0 | ||||
|         line_count = 0 | ||||
|         event_count = 0 | ||||
|         for line in self.fh: | ||||
|             byte_count += len(line) # loenda baite | ||||
|             line_count += 1 | ||||
|  | ||||
|             if not line.strip(): # jäta vahele tühjad read | ||||
|                 continue | ||||
|             m = re.match(self.RE_LOG_ENTRY, line) | ||||
|             if m: | ||||
|                 if log_entry and self.is_serious(log_entry): | ||||
|                     stack_trace = "\n".join(multiline_message.split("\n")[1:]) | ||||
|                     event_count += 1 | ||||
|                     row = \ | ||||
|                         datetime.strptime(log_entry.get("timestamp"), self.TIMESTAMP_FORMAT), \ | ||||
|                         log_entry, stack_trace, byte_count, line_count, event_count | ||||
|                     # See teeb funktsioonist generaatori/iteraatori | ||||
|                     yield row | ||||
|                 multiline_message = line | ||||
|                 log_entry = m.groupdict() | ||||
|             else: | ||||
|                 multiline_message += line | ||||
|  | ||||
| class JavaLogParser(GenericParser): | ||||
|     TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S.%f" | ||||
|     RE_LOG_ENTRY = "(?P<timestamp>.+?) (?P<severity>[A-Z]+) 1 --- \[(?P<thread>.+)\](?P<class>.+) +: (?P<message>.+)" | ||||
|  | ||||
|     def is_serious(self, log_entry): | ||||
|         return log_entry.get("severity") == "ERROR" | ||||
|  | ||||
| class ApacheLogParser(GenericParser): | ||||
|     RE_LOG_ENTRY = "(?P<remote_addr>.+?) - (?P<username>.+?) \[(?P<timestamp>.+?) \+\d\d\d\d\] \"(?P<verb>[A-Z]+) (?P<path>.+) HTTP/1.[01]\" (?P<status>\d+) (?P<size>\d+) \"(?P<referrer>.+?)\" \"(?P<user_agent>.+?)\"" | ||||
|     TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S" | ||||
|  | ||||
|     def is_serious(self, log_entry): | ||||
|         return int(log_entry.get("status")) >= 400 | ||||
							
								
								
									
										129
									
								
								raport/main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								raport/main.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,129 @@ | ||||
| #!/usr/bin/python | ||||
| # encoding: utf-8 | ||||
|  | ||||
| """ | ||||
| Sõltuvuste paigladamiseks: | ||||
|  | ||||
|   apt install -y python-jinja2 python-lxml python-pygal | ||||
|   dnf install -y python-jinja2 python-lxml python-pygal | ||||
| """ | ||||
|  | ||||
| import GeoIP | ||||
| import re | ||||
| import sys | ||||
| from collections import Counter | ||||
| from datetime import datetime | ||||
| from log_parsers import ApacheLogParser | ||||
|  | ||||
| then = datetime.now() | ||||
| hits_per_remote_addr = Counter() # IP-d kust tuldi | ||||
| hits_per_path = Counter() # URL-id mida külastati | ||||
| hits_per_user_agent = Counter() # User agent mida kasutati külastamisel | ||||
| hits_per_country = Counter() # Riigid kust päringud tulid | ||||
| hits_per_date = Counter() # Kuupäevad mil logikirjeid oli | ||||
| bytes_per_date = Counter() | ||||
|  | ||||
| gi = GeoIP.open("/usr/share/GeoIP/GeoIP.dat", GeoIP.GEOIP_MEMORY_CACHE) | ||||
|  | ||||
| print "Loen standardsisendist..." | ||||
| for timestamp, log_entry, stack_trace, byte_count, line_count, event_count in ApacheLogParser(sys.stdin): | ||||
|     country_code = gi.country_code_by_addr(log_entry.get("remote_addr")) | ||||
|     hits_per_path[log_entry.get("path")] += 1 | ||||
|     hits_per_remote_addr[log_entry.get("remote_addr")] += 1 | ||||
|     hits_per_user_agent[log_entry.get("user_agent")] += 1 | ||||
|     hits_per_country[country_code] += 1 | ||||
|     hits_per_date[timestamp.date()] += 1 | ||||
|     bytes_per_date[timestamp.date()] += int(log_entry.get("size")) | ||||
|  | ||||
| # Leia kõige esimene kuupäev ning viimane kuupäev millal sündmused esinesid | ||||
| first_date, last_date = min(hits_per_date.keys()), max(hits_per_date.keys()) | ||||
|  | ||||
| ############################ | ||||
| ### Värvi riigid kaardil ### | ||||
| ############################ | ||||
|  | ||||
| import requests | ||||
| from lxml import etree | ||||
| from numpy import interp | ||||
|  | ||||
| # Laadi alla kaart wikimedia veebist | ||||
| print "Laadin alla kaarti..." | ||||
| buf = requests.get("https://upload.wikimedia.org/wikipedia/commons/0/03/BlankMap-World6.svg").content | ||||
|  | ||||
| # Parsi XML puu | ||||
| map_document = etree.fromstring(buf) | ||||
| for country, count in hits_per_country.items(): | ||||
|     if not country: | ||||
|         # Mõni IP ei pruukinud laheneda riigikoodiks (sisevõrk jms) | ||||
|         continue | ||||
|     hue = interp(count, [0, max(hits_per_country.values())], [180, 0]) | ||||
|     for element in map_document.xpath("//*[@id='%s']" % country.lower()): | ||||
|         element.set("style", "fill:hsl(%.2f, 60%%, 60%%)" % hue) | ||||
|         for subelement in element: | ||||
|             subelement.attrib.pop("class", "") | ||||
|  | ||||
| with open("top.svg", "wb") as fh: | ||||
|     fh.write(etree.tostring(map_document)) | ||||
| print "Kaart salvestatud faili top.svg" | ||||
|  | ||||
|  | ||||
| #################################################### | ||||
| ### Koosta päringute arvu graafik päevade lõikes ### | ||||
| #################################################### | ||||
|  | ||||
| import matplotlib.pyplot as plt | ||||
| fig = plt.figure(figsize=(10, 7)) | ||||
|  | ||||
| # Lisa joonise sisse kaks graafikut | ||||
| sub = fig.add_subplot(2, 1, 1) | ||||
| sub2 = fig.add_subplot(2, 1, 2) | ||||
|  | ||||
| sub.set_xlabel(u"Päringute arv") | ||||
| sub2.set_xlabel(u"Liikluse maht baitides") | ||||
|  | ||||
|  | ||||
| # Ploti andmepunktid | ||||
| sub.barh(hits_per_date.keys(), hits_per_date.values()) | ||||
| sub2.barh(bytes_per_date.keys(), bytes_per_date.values()) | ||||
|  | ||||
| # Salvesta faili | ||||
| fig.savefig("bar.svg", format="svg") | ||||
| fig.savefig("bar.png") | ||||
|  | ||||
|  | ||||
| ####################################### | ||||
| ### Koosta veebilehitsejate graafik ### | ||||
| ####################################### | ||||
|  | ||||
| import pygal | ||||
| line_chart = pygal.Pie( | ||||
|     truncate_legend=50, # legend kuni 50 karakterit | ||||
|     width=1000, height=300, # graafiku laius/kõrgus pikslites | ||||
|     style=pygal.style.Style(background='transparent')) # eemalda taustavärv | ||||
| line_chart.config(style_name = 'LightStyle', fill=None) | ||||
| line_chart.title = 'Veebilehitsejate osakaal' | ||||
| for user_agent, count in hits_per_user_agent.most_common(7): | ||||
|     line_chart.add(user_agent, count) # lisa graafikule | ||||
|     hits_per_user_agent.pop(user_agent) # eemalda counteri objektist | ||||
| line_chart.add("Muud veebilehitsejad", sum(hits_per_user_agent.values())) | ||||
|  | ||||
| user_agent_chart = line_chart.render(is_unicode=True, disable_xml_declaration=True) | ||||
|  | ||||
|  | ||||
|  | ||||
| from jinja2 import Template | ||||
| import codecs | ||||
|  | ||||
| # Loe jinja mall UTF-8 tekstifailist | ||||
| with codecs.open("template.html", "rb", encoding="utf-8") as fh: | ||||
|     template = Template(fh.read()) | ||||
|     # Süsti malli sisse kõik kohalikud muutujad (first_date, last_date, hits, urls jne) | ||||
|     buf = template.render(locals()) | ||||
|  | ||||
|     # Salvesta täidetud leht UTF-8 kodeeringus | ||||
|     with codecs.open("raport.html", "wb", encoding="utf-8") as fh: | ||||
|         fh.write(buf) | ||||
|  | ||||
| print "HTML kujul raport savlestatud faili raport.html" | ||||
|  | ||||
|  | ||||
							
								
								
									
										103
									
								
								raport/template.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								raport/template.html
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| <!DOCTYPE> | ||||
| <html> | ||||
|   <head> | ||||
|     <title>Apache logide raport</title> | ||||
|     <meta charset="utf-8"/> | ||||
|     <link href="https://getbootstrap.com/docs/4.1/dist/css/bootstrap.min.css" rel="stylesheet"/> | ||||
|     <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" crossorigin="anonymous"></script> | ||||
|     <script src="https://getbootstrap.com/docs/4.1/dist/js/bootstrap.min.js"></script> | ||||
|     <script type="text/javascript" src="http://kozea.github.com/pygal.js/latest/pygal-tooltips.min.js"></script> | ||||
|     <style> | ||||
|     svg, img { | ||||
|         max-width: 80%; | ||||
|     } | ||||
|     </style> | ||||
|   </head> | ||||
|   <body> | ||||
|     <!-- Navigeerimise lingid, lehe sees --> | ||||
|     <nav class="navbar navbar-expand-lg navbar-light bg-light fixed-top"> | ||||
|       <a class="navbar-brand" href="#">Apache2 logid</a> | ||||
|       <div class="collapse navbar-collapse" id="navbarSupportedContent"> | ||||
|         <ul class="navbar-nav mr-auto"> | ||||
|           <li class="nav-item"><a class="nav-link" href="#home">Üles</a></li> | ||||
|           <li class="nav-item"><a class="nav-link" href="#per_date">Liiklus</a></li> | ||||
|           <li class="nav-item"><a class="nav-link" href="#user_agents">Veebilehitsejad</a></li> | ||||
|           <li class="nav-item"><a class="nav-link" href="#urls">URL-id</a></li> | ||||
|           <li class="nav-item"><a class="nav-link" href="#remote_addrs">IP-aadressid</a></li> | ||||
|           <li class="nav-item"><a class="nav-link" href="#countries">Riigid</a></li> | ||||
|         </ul> | ||||
|       </div> | ||||
|     </nav> | ||||
|       <div class="container"> | ||||
|         <br/> | ||||
|         <br/> | ||||
|         <br/> | ||||
|         <div class="row mt-3" id="home"> | ||||
|           <div class="col-md-12"> | ||||
|             <h2>Sisendandmed</h2> | ||||
|             <p> | ||||
|             Läbi näritud {{ byte_count | filesizeformat }} andmeid, | ||||
|             {{ line_count }} rida, {{ event_count }} logikirjet. | ||||
|             Logikirjed | ||||
|             {% if first_date == last_date %} | ||||
|                 päeval {{ first_date }} | ||||
|             {% else %} | ||||
|               {{ first_date }} kuni {{ last_date }} | ||||
|             {% endif %} | ||||
|             </p> | ||||
|           </div> | ||||
|         </div> | ||||
|         <div class="row mt-3" id="per_date"> | ||||
|           <div class="col-md-12"> | ||||
|             <h2>Kuupäevad</h2> | ||||
|             <p>Päringute arv ning päringute maht kuupäevade kaupa grupeeritult, | ||||
|             ploteeritud <a href="https://matplotlib.org/" target="_blank">matplotlib</a> abil</p> | ||||
|             <img src="bar.svg"/> | ||||
|             <h2 id="user_agents">Top 10 veebilehitsejad</h2> | ||||
|             <p>Interaktiivne graafik ploteeritud <a href="http://pygal.org/en/stable/" target="_blank">pygal</a> abil | ||||
|             <div> | ||||
|               {{ user_agent_chart }} | ||||
|             </div> | ||||
|           </div> | ||||
|         </div> | ||||
|         <div class="row mt-3" id="urls"> | ||||
|           <div class="col-md-12"> | ||||
|             <h2>Top 10 URL-id</h2> | ||||
|             <div> | ||||
|               <ol> | ||||
|                 {% for path, count in hits_per_path.most_common(10) %} | ||||
|                   <li><a href="http://enos.itcollege.ee{{ path }}" target="_blank">{{ path }}</a>: {{ count }}</li> | ||||
|                 {% endfor %} | ||||
|               </ol> | ||||
|             </div> | ||||
|           </div> | ||||
|         </div> | ||||
|         <div class="row mt-3" id="remote_addrs"> | ||||
|           <div class="col-md-12"> | ||||
|             <h2>Top10 IP aadressid</h2> | ||||
|             <div> | ||||
|               <ol> | ||||
|                 {% for remote_addr, count in hits_per_remote_addr.most_common(10) %} | ||||
|                   <li><a href="http://geoiplookup.net/ip/{{ remote_addr }}" target="_blank">{{ remote_addr }}</a>: {{ count }}</li> | ||||
|                 {% endfor %} | ||||
|               </ol> | ||||
|             </div> | ||||
|           </div> | ||||
|         </div> | ||||
|         <div class="row mt-3" id="countries"> | ||||
|           <div class="col-md-12"> | ||||
|             <h2>Külastused riikide kaupa</h2> | ||||
|             <p>Loetud XML failist, värvitud lxml mooduli abil ning salvestatud top.svg faili sisse:</p> | ||||
|             <div> | ||||
|                 <img src="top.svg"/> | ||||
|             </div> | ||||
|           </div> | ||||
|         </div> | ||||
|         <div class="row mt-3" id="remote_addrs"> | ||||
|           <div class="col-md-12"> | ||||
|             <p>Fail genereeriti {{ datetime.now() }}, võttis aega {{ datetime.now() - then }}</p> | ||||
|           </div> | ||||
|         </div> | ||||
|       </div> | ||||
|   </body> | ||||
| </html> | ||||
		Reference in New Issue
	
	Block a user