logmower-shipper/cmd/line.go

package logmower

import (
	"bytes"
	"fmt"
	"sync"
	"time"

	prom "github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
	"go.uber.org/zap"
)

var (
	promRecordPrefixParsingErr = promauto.NewCounterVec(prom.CounterOpts{
		Namespace: PrometheusPrefix,
		Subsystem: "record",
		Name:      "parsing_errors",
		Help:      "Errors while parsing log line prefixes",
	}, []string{"filename"})
	promRecordDroppedTooLarge = promauto.NewCounterVec(prom.CounterOpts{
		Namespace: PrometheusPrefix,
		// Subsystem: "record",
		Name: "dropped_lines", // "dropped",
		Help: "Records dropped due to being too large",
	}, []string{"filename"})
)

type (
	rawLine struct {
		recordMetadata
		line []byte
	}

	singleLine struct {
		mLog
		line      []byte
		isPartial bool // P or F
	}
)

func parseSingleContainerLine(line []byte) (u singleLine, err error) {
	split := bytes.SplitN(line, []byte(" "), 4)
	if len(split) != 4 {
		u.line = line
		return u, fmt.Errorf("expected at least 3 spaces in container log line, got %d", len(split)-1)
	}

	u.line = split[3]

	u.StdErr = string(split[1]) == "stderr" // or stdout
	switch string(split[2]) {
	case "P":
		u.isPartial = true
	case "F":
	default:
		return u, fmt.Errorf("partial indicator must be 'P' or 'F', not %q", split[2])
	}

	u.ContainerTime, err = time.Parse(time.RFC3339Nano, string(split[0]))

	return u, err
}

func (s *submitter) parseContainerLogLines(unparsed <-chan rawLine, parsed chan<- singleLine) {
	for {
		raw, ok := <-unparsed
		if !ok {
			close(parsed)
			return
		}

		line, err := parseSingleContainerLine(raw.line)
		if err != nil {
			promRecordPrefixParsingErr.WithLabelValues(raw.File).Add(1)
			s.l.Error("parsing container log line", zap.Error(err), zap.String("file", raw.File))
		}

		line.mLog.recordMetadata = raw.recordMetadata
		parsed <- line
	}
}

// assumes all lines are from same file
func (s *submitter) parseLines(bufferLimitBytes int, unparsed <-chan rawLine, parsed chan<- mLog) {
	lines := make(chan singleLine)
	go s.parseContainerLogLines(unparsed, lines)

	var wg sync.WaitGroup
	wg.Add(2)

	stdOut, stdErr := make(chan singleLine), make(chan singleLine)
	go func() {
		s.parseStdChannel(bufferLimitBytes, stdOut, parsed)
		wg.Done()
	}()
	go func() {
		s.parseStdChannel(bufferLimitBytes, stdErr, parsed)
		wg.Done()
	}()

	// split stdout and stderr
	for {
		line, ok := <-lines
		if !ok {
			close(stdOut)
			close(stdErr)
			wg.Wait()
			close(parsed)
			return
		}

		if line.StdErr {
			stdErr <- line
		} else {
			stdOut <- line
		}
	}
}

// partial is ended with full

func (s *submitter) parseStdChannel(bufferLimitBytes int, lines <-chan singleLine, parsed chan<- mLog) {
	var firstTime time.Time
	var buffer []byte

	flush := func(last *mLog) {
		parsed <- mLog{
			recordMetadata: last.recordMetadata,
			StdErr:         last.StdErr,

			ContainerTime: firstTime,
			Content:       parseRecord(buffer),
		}
		buffer = nil
	}

	for {
		line, ok := <-lines
		if !ok {
			// discard any partial lines without end delimiter (full line)
			return
		}

		if len(buffer) == 0 {
			firstTime = line.ContainerTime
		}

		buffer = append(buffer, line.line...)

		if len(buffer) > bufferLimitBytes {
			buffer = nil
			promRecordDroppedTooLarge.WithLabelValues(line.File).Add(1)
			s.l.Warn("dropped record: too large", zap.Int("cap_bytes", bufferLimitBytes))
			continue
		}

		if !line.isPartial {
			flush(&line.mLog)
		}
	}
	// TODO: flush last
	// use time of first
	// metadata of last
	// //
	// for {
	// }

	// promRecordDroppedTooLarge
}

func parseRecord(buffer []byte) any {
	// TODO: json parser
	return string(buffer)
}
parse partial lines 2022-11-06 20:02:29 +00:00			`package logmower`

			`import (`
			`"bytes"`
			`"fmt"`
			`"sync"`
			`"time"`

			`prom "github.com/prometheus/client_golang/prometheus"`
			`"github.com/prometheus/client_golang/prometheus/promauto"`
			`"go.uber.org/zap"`
			`)`

			`var (`
			`promRecordPrefixParsingErr = promauto.NewCounterVec(prom.CounterOpts{`
			`Namespace: PrometheusPrefix,`
			`Subsystem: "record",`
			`Name: "parsing_errors",`
			`Help: "Errors while parsing log line prefixes",`
			`}, []string{"filename"})`
			`promRecordDroppedTooLarge = promauto.NewCounterVec(prom.CounterOpts{`
			`Namespace: PrometheusPrefix,`
			`// Subsystem: "record",`
			`Name: "dropped_lines", // "dropped",`
			`Help: "Records dropped due to being too large",`
			`}, []string{"filename"})`
			`)`

			`type (`
			`rawLine struct {`
			`recordMetadata`
			`line []byte`
			`}`

			`singleLine struct {`
			`mLog`
			`line []byte`
			`isPartial bool // P or F`
			`}`
			`)`

			`func parseSingleContainerLine(line []byte) (u singleLine, err error) {`
			`split := bytes.SplitN(line, []byte(" "), 4)`
			`if len(split) != 4 {`
			`u.line = line`
			`return u, fmt.Errorf("expected at least 3 spaces in container log line, got %d", len(split)-1)`
			`}`

			`u.line = split[3]`

			`u.StdErr = string(split[1]) == "stderr" // or stdout`
			`switch string(split[2]) {`
			`case "P":`
			`u.isPartial = true`
			`case "F":`
			`default:`
			`return u, fmt.Errorf("partial indicator must be 'P' or 'F', not %q", split[2])`
			`}`

			`u.ContainerTime, err = time.Parse(time.RFC3339Nano, string(split[0]))`

			`return u, err`
			`}`

tidy logging 2022-11-06 20:24:33 +00:00			`func (s *submitter) parseContainerLogLines(unparsed <-chan rawLine, parsed chan<- singleLine) {`
parse partial lines 2022-11-06 20:02:29 +00:00			`for {`
			`raw, ok := <-unparsed`
			`if !ok {`
			`close(parsed)`
			`return`
			`}`

			`line, err := parseSingleContainerLine(raw.line)`
			`if err != nil {`
			`promRecordPrefixParsingErr.WithLabelValues(raw.File).Add(1)`
tidy logging 2022-11-06 20:24:33 +00:00			`s.l.Error("parsing container log line", zap.Error(err), zap.String("file", raw.File))`
parse partial lines 2022-11-06 20:02:29 +00:00			`}`

			`line.mLog.recordMetadata = raw.recordMetadata`
			`parsed <- line`
			`}`
			`}`

			`// assumes all lines are from same file`
			`func (s *submitter) parseLines(bufferLimitBytes int, unparsed <-chan rawLine, parsed chan<- mLog) {`
			`lines := make(chan singleLine)`
tidy logging 2022-11-06 20:24:33 +00:00			`go s.parseContainerLogLines(unparsed, lines)`
parse partial lines 2022-11-06 20:02:29 +00:00
			`var wg sync.WaitGroup`
			`wg.Add(2)`

			`stdOut, stdErr := make(chan singleLine), make(chan singleLine)`
			`go func() {`
			`s.parseStdChannel(bufferLimitBytes, stdOut, parsed)`
			`wg.Done()`
			`}()`
			`go func() {`
			`s.parseStdChannel(bufferLimitBytes, stdErr, parsed)`
			`wg.Done()`
			`}()`

			`// split stdout and stderr`
			`for {`
			`line, ok := <-lines`
			`if !ok {`
			`close(stdOut)`
			`close(stdErr)`
			`wg.Wait()`
			`close(parsed)`
			`return`
			`}`

			`if line.StdErr {`
			`stdErr <- line`
			`} else {`
			`stdOut <- line`
			`}`
			`}`
			`}`

			`// partial is ended with full`

			`func (s *submitter) parseStdChannel(bufferLimitBytes int, lines <-chan singleLine, parsed chan<- mLog) {`
			`var firstTime time.Time`
			`var buffer []byte`

			`flush := func(last *mLog) {`
			`parsed <- mLog{`
			`recordMetadata: last.recordMetadata,`
			`StdErr: last.StdErr,`

			`ContainerTime: firstTime,`
			`Content: parseRecord(buffer),`
			`}`
			`buffer = nil`
			`}`

			`for {`
			`line, ok := <-lines`
			`if !ok {`
			`// discard any partial lines without end delimiter (full line)`
			`return`
			`}`

			`if len(buffer) == 0 {`
			`firstTime = line.ContainerTime`
			`}`

			`buffer = append(buffer, line.line...)`

			`if len(buffer) > bufferLimitBytes {`
			`buffer = nil`
			`promRecordDroppedTooLarge.WithLabelValues(line.File).Add(1)`
tidy logging 2022-11-06 20:24:33 +00:00			`s.l.Warn("dropped record: too large", zap.Int("cap_bytes", bufferLimitBytes))`
parse partial lines 2022-11-06 20:02:29 +00:00			`continue`
			`}`

			`if !line.isPartial {`
			`flush(&line.mLog)`
			`}`
			`}`
			`// TODO: flush last`
			`// use time of first`
			`// metadata of last`
			`// //`
			`// for {`
			`// }`

			`// promRecordDroppedTooLarge`
			`}`

			`func parseRecord(buffer []byte) any {`
			`// TODO: json parser`
			`return string(buffer)`
			`}`