Improved line buffering
continuous-integration/drone Build is passing
Details
continuous-integration/drone Build is passing
Details
This commit is contained in:
parent
2ccd734ba5
commit
4d90244507
|
@ -237,21 +237,52 @@ async def uploader(coll, queue):
|
||||||
messages = []
|
messages = []
|
||||||
|
|
||||||
|
|
||||||
class LogFile(object):
|
class FileTailer(object):
|
||||||
def __init__(self, loop, coll, queue, path, namespace_name, pod_name, container_name, start=False):
|
def __init__(self, path, offset=0, finished=False):
|
||||||
|
self.head = offset
|
||||||
|
self.tail = offset + 1
|
||||||
|
self.offset = offset
|
||||||
self.path = path
|
self.path = path
|
||||||
self.tail = 0
|
self.buf = b""
|
||||||
|
self.finished = finished
|
||||||
self.more_content = asyncio.Event()
|
self.more_content = asyncio.Event()
|
||||||
self.fh = open(path, "rb")
|
|
||||||
|
async def __aiter__(self):
|
||||||
|
with open(self.path, "rb") as fh:
|
||||||
|
while True:
|
||||||
|
if not self.finished and self.head >= self.tail:
|
||||||
|
await self.more_content.wait()
|
||||||
|
self.more_content.clear()
|
||||||
|
self.tail = fh.seek(0, os.SEEK_END)
|
||||||
|
if self.head >= self.tail:
|
||||||
|
if self.finished:
|
||||||
|
# TODO: if there is still something in buf?
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
fh.seek(self.head)
|
||||||
|
chunk = fh.read(min(self.tail - self.head, 4096))
|
||||||
|
self.buf += chunk
|
||||||
|
self.head += len(chunk)
|
||||||
|
while True:
|
||||||
|
step = self.buf.find(b"\n")
|
||||||
|
if step == -1:
|
||||||
|
break
|
||||||
|
buf = self.buf[:step + 1]
|
||||||
|
self.buf = self.buf[step + 1:]
|
||||||
|
yield self.offset, len(buf), buf[:-1].decode("utf-8")
|
||||||
|
self.offset += step + 1
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
class LogFile(FileTailer):
|
||||||
|
def __init__(self, loop, coll, queue, path, namespace_name, pod_name, container_name, start=False):
|
||||||
|
FileTailer.__init__(self, path)
|
||||||
self.queue = queue
|
self.queue = queue
|
||||||
self.namespace_name = namespace_name
|
self.namespace_name = namespace_name
|
||||||
self.pod_name = pod_name
|
self.pod_name = pod_name
|
||||||
self.container_name = container_name
|
self.container_name = container_name
|
||||||
self.running = True
|
|
||||||
self.coll = coll
|
self.coll = coll
|
||||||
self.poke()
|
|
||||||
self.state = "seeking"
|
self.state = "seeking"
|
||||||
self.done = False
|
|
||||||
self.loop = loop
|
self.loop = loop
|
||||||
if start:
|
if start:
|
||||||
self.start()
|
self.start()
|
||||||
|
@ -269,7 +300,6 @@ class LogFile(object):
|
||||||
async def handler_loop(self):
|
async def handler_loop(self):
|
||||||
message = ""
|
message = ""
|
||||||
record_size = 0
|
record_size = 0
|
||||||
self.head = 0
|
|
||||||
skip_next = False
|
skip_next = False
|
||||||
if not args.dry_run:
|
if not args.dry_run:
|
||||||
then = time()
|
then = time()
|
||||||
|
@ -279,51 +309,34 @@ class LogFile(object):
|
||||||
}, sort=[("log.offset", -1)])
|
}, sort=[("log.offset", -1)])
|
||||||
histogram_database_operation_latency.labels("find-replay-offset").observe(time() - then)
|
histogram_database_operation_latency.labels("find-replay-offset").observe(time() - then)
|
||||||
if last_record:
|
if last_record:
|
||||||
self.head = last_record["log"]["offset"]
|
self.head = self.offset = last_record["log"]["offset"]
|
||||||
counter_skipped_bytes.inc(self.head)
|
counter_skipped_bytes.inc(self.head)
|
||||||
skip_next = True
|
skip_next = True
|
||||||
|
|
||||||
self.state = "replaying"
|
self.state = "replaying"
|
||||||
offset = self.head
|
record_offset = self.offset
|
||||||
while self.running:
|
async for line_offset, line_size, line in self:
|
||||||
while self.head >= self.tail:
|
assert "\n" not in line
|
||||||
self.state = "watching"
|
|
||||||
if self.done:
|
|
||||||
break
|
|
||||||
await self.more_content.wait()
|
|
||||||
self.more_content.clear()
|
|
||||||
self.tail = self.fh.seek(0, os.SEEK_END)
|
|
||||||
|
|
||||||
assert self.head < self.tail
|
|
||||||
self.fh.seek(self.head)
|
|
||||||
buf = self.fh.readline()
|
|
||||||
self.head += len(buf)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
reason = "unicode-encoding"
|
reason = "unicode-encoding"
|
||||||
line = buf.decode("utf-8")
|
|
||||||
if len(line) < 45:
|
if len(line) < 45:
|
||||||
reason = "line-short"
|
reason = "line-short"
|
||||||
raise ValueError()
|
raise ValueError()
|
||||||
if not line[-1] == "\n":
|
if not re.match("^(.+) (stdout|stderr)( (.))? (.*)$", line):
|
||||||
reason = "no-newline %s" % repr((buf))
|
|
||||||
raise ValueError()
|
|
||||||
if not re.match("^(.+) (stdout|stderr)( (.))? (.*)$", line[:-1]):
|
|
||||||
reason = "no-regex-match"
|
reason = "no-regex-match"
|
||||||
raise ValueError()
|
raise ValueError()
|
||||||
reason = "invalid-timestamp"
|
reason = "invalid-timestamp"
|
||||||
event_created = datetime.strptime(line[:23], "%Y-%m-%dT%H:%M:%S.%f")
|
event_created = datetime.strptime(line[:23], "%Y-%m-%dT%H:%M:%S.%f")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print("Failed to parse file %s at offset %d, reason: %s" % (self.path, self.head, reason))
|
print("Failed to parse file %s at offset %d, reason %s: %s" % (self.path, line_offset, reason, repr(line)))
|
||||||
break
|
break
|
||||||
|
|
||||||
histogram_line_size.observe(len(buf))
|
histogram_line_size.observe(line_size)
|
||||||
|
record_size += line_size
|
||||||
record_size += len(buf)
|
|
||||||
|
|
||||||
if record_size < args.max_record_size:
|
if record_size < args.max_record_size:
|
||||||
# TODO: Support Docker runtime on EKS
|
# TODO: Support Docker runtime on EKS
|
||||||
message += line[45:-1]
|
message += line[45:]
|
||||||
|
|
||||||
state = line[43]
|
state = line[43]
|
||||||
if state == "P":
|
if state == "P":
|
||||||
|
@ -368,7 +381,7 @@ class LogFile(object):
|
||||||
o["log"]["file"] = {
|
o["log"]["file"] = {
|
||||||
"path": self.path
|
"path": self.path
|
||||||
}
|
}
|
||||||
o["log"]["offset"] = offset
|
o["log"]["offset"] = record_offset
|
||||||
o["host"] = host_info
|
o["host"] = host_info
|
||||||
o["stream"] = stream
|
o["stream"] = stream
|
||||||
o["event"] = {
|
o["event"] = {
|
||||||
|
@ -393,9 +406,8 @@ class LogFile(object):
|
||||||
await self.queue.put(o)
|
await self.queue.put(o)
|
||||||
gauge_queue_entries.set(self.queue.qsize())
|
gauge_queue_entries.set(self.queue.qsize())
|
||||||
skip_next = False
|
skip_next = False
|
||||||
offset = self.head
|
record_offset = line_offset
|
||||||
self.state = "closing"
|
self.state = "closing"
|
||||||
self.fh.close()
|
|
||||||
log_files.pop(self.path)
|
log_files.pop(self.path)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue