#!/usr/local/bin/python3 -OO import argparse import asyncio import collections import os import re import socket import ujson import pymongo from asyncinotify import Inotify, Mask from datetime import datetime from math import inf from motor.motor_asyncio import AsyncIOMotorClient from prometheus_client import Counter, Gauge, Histogram from prometheus_client.exposition import generate_latest from pymongo.errors import CollectionInvalid from sanic import Sanic, text from time import time """ To install dependencies: pip3 install ujson pymongo motor asyncinotify prometheus_client sanic """ parser = argparse.ArgumentParser(description="Log shipper", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--dry-run", action="store_true", help="Do not insert anything into database") parser.add_argument("--namespace", type=str, help="Namespace to monitor, all by default") parser.add_argument("--exclude-pod-prefixes", nargs="*", type=str, default=["logmower-"], help="Pod prefixes to exclude") parser.add_argument("--max-upload-queue-size", type=int, default=10000, help="Max upload queue size in records") parser.add_argument("--max-connection-pool-size", type=int, default=1, help="Max MongoDB connection pool size") parser.add_argument("--max-record-size", type=int, default=128 * 1024, help="Max record size in bytes, 128k by default") parser.add_argument("--max-record-retention", type=int, help="Record retention in seconds, never by default") parser.add_argument("--max-collection-size", type=int, help="MongoDB collection size limit in bytes, by default disabled") parser.add_argument("--bulk-insertion-size", type=int, default=1000, help="MongoDB bulk insertion size in records") parser.add_argument("--heuristic-parse-json", action="store_true", help="Attempt automatically unwrapping JSON records") parser.add_argument("--heuristic-normalize-log-level", action="store_true", help="Normalize log.level values to Syslog defined keywords") args = parser.parse_args() ROOT = "/var/log/containers" app = Sanic("tail") tasks = dict() with open("/etc/machine-id") as fh: machine_id = fh.read().strip() host_info = { "id": machine_id, "architecture": os.uname().machine, "name": os.environ.get("NODE_NAME", socket.getfqdn()) } log_files = dict() gauge_buffer_size = Gauge( "logmower_buffer_size_bytes", "Log files buffered in memory") gauge_backlog_size = Gauge( "logmower_backlog_size_bytes", "Content that is yet to be submitted") gauge_log_files = Gauge( "logmower_log_file_count", "Number of tracked log files", ["state"]) gauge_queue_entries = Gauge( "logmower_queue_record_count", "Records queued for submission") counter_unexpected_filenames = Counter( "logmower_invalid_filename_count", "Count of unexpected filenames in logs directory") counter_inotify_events = Counter( "logmower_inotify_event_count", "Count of inotify events", ["mask"]) counter_skipped_bytes = Counter( "logmower_skipped_bytes", "Bytes that were skipped during startup due to being already present in data store") counter_dropped_lines = Counter( "logmower_dropped_lines", "Lines dropped due to being part of too long record") counter_heuristic_failures = Counter( "logmower_heuristic_failed_record_count", "Heuristic failures", ["mode"]) counter_records = Counter( "logmower_record_count", "Record count", ["stage"]) counter_insertion_errors = Counter( "logmower_insertion_error_count", "Exceptions caught during insertion of single event", ["exception"]) counter_bulk_insertion_errors = Counter( "logmower_bulk_insertion_error_count", "Exceptions caught during bulk insertions", ["exception"]) counter_bulk_insertions = Counter( "logmower_bulk_insertion_count", "Count of bulk insertions to database", ["status"]) histogram_bulk_submission_size = Histogram( "logmower_bulk_submission_message_count", "Bulk submission message count", buckets=(1, 5, 10, 50, 100, 500, 1000, 5000, 10000)) histogram_database_operation_latency = Histogram( "logmower_database_operation_latency", "Database operation latency", ["operation"], buckets=(0.1, 0.2, 0.5, 1, 5, 10, 50)) histogram_bulk_submission_latency = Histogram( "logmower_bulk_submission_latency", "Bulk submission latency", buckets=(0.1, 0.2, 0.5, 1, 5, 10, 50)) histogram_line_size = Histogram( "logmower_line_size_bytes", "Log file line size in sizes", buckets=(80, 160, 320, 640, 1280, inf)) NORMALIZED_LOG_LEVELS = { # Syslog level emergency (0), should not be used by applications "emerg": "emergency", "panic": "emergency", # Syslog level alert (1) "a": "alert", # Syslog level critical (2), likely results in program exit "crit": "critical", "fatal": "critical", "f": "critical", # Syslog level error (3) "err": "error", "e": "error", # Syslog level warning (4) "warn": "warning", "w": "warning", # Following log levels should not be enabled by default # Syslog level notice (5) "n": "notice", # Syslog level informational (6) "informational": "info", "i": "info", # Syslog level debug (7) "d": "debug", "d1": "debug", "d2": "debug", "d3": "debug", "d4": "debug", "d5": "debug", "trace": "debug", } async def uploader(coll, queue): then = time() kwargs = {} if args.max_record_retention: kwargs["expireAfterSeconds"] = args.max_record_retention await coll.create_index([("@timestamp", 1)], **kwargs) # Following index is used to look up where to resume submitting logs # after restart/crash await coll.create_index([("log.file.path", 1), ("log.offset", 1)], unique=True) # Indexes used for frequent searches await coll.create_index([("host.name", 1)]) await coll.create_index([("kubernetes.pod.name", 1)], sparse=True) await coll.create_index([("kubernetes.namespace", 1), ("kubernetes.pod.name", 1), ("kubernetes.container.name", 1)], sparse=True) histogram_database_operation_latency.labels("create-index").observe(time() - then) messages = [] while True: while len(messages) < args.bulk_insertion_size: try: o = await asyncio.wait_for(queue.get(), timeout=0.1) except asyncio.exceptions.TimeoutError: break else: gauge_queue_entries.set(queue.qsize()) o["event"]["ingested"] = datetime.utcnow() messages.append(o) if not messages: continue try: then = time() await coll.insert_many(messages) histogram_database_operation_latency.labels("insert-many").observe(time() - then) except pymongo.errors.ServerSelectionTimeoutError: counter_bulk_insertions.labels("timed-out").inc() continue except pymongo.errors.NotPrimaryError: counter_bulk_insertions.labels("not-primary").inc() continue except pymongo.errors.BulkWriteError as e: counter_bulk_insertions.labels("retried-as-singles").inc() j = "%s.%s" % (e.__class__.__module__, e.__class__.__name__) counter_bulk_insertion_errors.labels(j).inc() print("Bulk insert failed: %s" % j) for o in messages: o.pop("_id", None) o["event"]["ingested"] = datetime.utcnow() try: then = time() await coll.insert_one(o) histogram_database_operation_latency.labels("insert-one").observe(time() - then) except Exception as e: j = "%s.%s" % (e.__class__.__module__, e.__class__.__name__) counter_insertion_errors.labels(j).inc() counter_records.labels("dropped").inc() print("Failed to insert (%s): %s" % (j, o)) else: counter_records.labels("commited").inc() else: counter_bulk_insertions.labels("successful").inc() histogram_bulk_submission_size.observe(len(messages)) counter_records.labels("commited").inc(len(messages)) messages = [] class FileTailer(object): def __init__(self, path, offset=0, finished=False): self.head = offset self.tail = offset + 1 self.offset = offset self.path = path self.buf = b"" self.finished = finished self.caughtup = False self.more_content = asyncio.Event() async def __aiter__(self): with open(self.path, "rb") as fh: while True: if not self.finished and self.head >= self.tail: self.caughtup = True await self.more_content.wait() self.more_content.clear() self.tail = fh.seek(0, os.SEEK_END) if self.head >= self.tail: if self.finished: # TODO: if there is still something in buf? break continue fh.seek(self.head) chunk = fh.read(min(self.tail - self.head, 4096)) self.buf += chunk self.head += len(chunk) while True: step = self.buf.find(b"\n") if step == -1: break buf = self.buf[:step + 1] self.buf = self.buf[step + 1:] await asyncio.sleep(0) yield self.offset, len(buf), buf[:-1].decode("utf-8") self.offset += step + 1 break class LogFile(FileTailer): def __init__(self, loop, coll, queue, path, namespace_name, pod_name, container_name, start=False): FileTailer.__init__(self, path) self.queue = queue self.namespace_name = namespace_name self.pod_name = pod_name self.container_name = container_name self.coll = coll self.state = "seeking" self.loop = loop if start: self.start() def start(self): self.loop.create_task(self.handler_loop()) def poke(self): self.more_content.set() def close(self): self.done = True self.poke() async def handler_loop(self): message = "" record_size = 0 skip_next = False if not args.dry_run: then = time() last_record = await self.coll.find_one({ "host.id": host_info["id"], "log.file.path": self.path }, sort=[("log.offset", -1)]) histogram_database_operation_latency.labels("find-replay-offset").observe(time() - then) if last_record: self.head = self.offset = last_record["log"]["offset"] counter_skipped_bytes.inc(self.head) skip_next = True self.state = "replaying" record_offset = self.offset async for line_offset, line_size, line in self: self.state = "watching" if self.caughtup else "replaying" assert "\n" not in line try: reason = "unicode-encoding" if len(line) < 45: reason = "line-short" raise ValueError() if not re.match("^(.+) (stdout|stderr)( (.))? (.*)$", line): reason = "no-regex-match" raise ValueError() reason = "invalid-timestamp" event_created = datetime.strptime(line[:23], "%Y-%m-%dT%H:%M:%S.%f") except ValueError: print("Failed to parse file %s at offset %d, reason %s: %s" % (self.path, line_offset, reason, repr(line))) break histogram_line_size.observe(line_size) record_size += line_size if record_size < args.max_record_size: # TODO: Support Docker runtime on EKS message += line[45:] state = line[43] if state == "P": # This is partial message continue assert state == "F", "Unknown line state" o = {} o["message"] = message o["log"] = {} message = "" record_size = 0 if record_size > args.max_record_size: counter_records.labels("too-large").inc() # TODO: Log portion of the message continue stream = line[36:42].strip() if args.heuristic_parse_json and o["message"].startswith("{\""): # TODO: Follow Filebeat hints try: j = ujson.loads(message) except ujson.JSONDecodeError: counter_heuristic_failures.labels("invalid-json").inc() else: # Merge only if parsed JSON message looks like it's # conforming to ECS schema if "@timestamp" in j and "message" in j: o.update(j) else: o["json"] = j o["kubernetes"] = { "container": { "name": self.container_name, }, "namespace": self.namespace_name, "pod": { "name": self.pod_name } } o["log"]["file"] = { "path": self.path } o["log"]["offset"] = record_offset o["host"] = host_info o["stream"] = stream o["event"] = { "created": event_created } if args.heuristic_normalize_log_level: if "level" in o["log"]: level = o["log"]["level"].strip().lower() try: o["log"]["level"] = NORMALIZED_LOG_LEVELS[level] except KeyError: counter_heuristic_failures.labels("invalid-log-level").inc() else: o["log"]["level"] = "error" if stream == "stderr" else "info" if "@timestamp" not in o: o["@timestamp"] = o["event"]["created"] o.pop("_id", None) if not skip_next: await self.queue.put(o) gauge_queue_entries.set(self.queue.qsize()) skip_next = False record_offset = line_offset self.state = "closing" log_files.pop(self.path) async def watcher(loop, queue, coll): print("Starting watching") with Inotify() as inotify: def add_file(path, done=False, start=False): if path in log_files: log_files[path].done = done return log_files[path] print("Adding file: %s" % path) m = re.match("/var/log/pods/(.*)_(.*)_.*/(.*)/[0-9]+\\.log$", path) if not m: print("Unexpected filename:", path) counter_unexpected_filenames.inc() return namespace_name, pod_name, container_name = m.groups() if args.namespace and args.namespace == namespace_name: return for prefix in args.exclude_pod_prefixes: if pod_name.startswith(prefix): return if args.namespace and namespace_name != args.namespace: return lf = log_files[path] = LogFile(loop, coll, queue, path, namespace_name, pod_name, container_name) lf.done = done lf.start() inotify.add_watch(path, Mask.MODIFY | Mask.CLOSE_WRITE) return lf inotify.add_watch(ROOT, Mask.CREATE | Mask.ONLYDIR) # Register all existing log files for pod_dir in os.listdir("/var/log/pods"): m = re.match("(.*)_(.*)_(.*)$", pod_dir) if not m: print("Unexpected directory", pod_dir) continue namespace_name, pod_name, pod_id = m.groups() for container_name in os.listdir(os.path.join("/var/log/pods", pod_dir)): if not re.match("^(?![0-9]+$)(?!-)[a-zA-Z0-9-]{,63}(?