2022-10-11 07:55:22 +00:00
|
|
|
package logmower
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2022-11-04 08:47:45 +00:00
|
|
|
"sync"
|
2022-11-05 23:45:19 +00:00
|
|
|
"time"
|
2022-10-11 07:55:22 +00:00
|
|
|
|
|
|
|
"github.com/jtagcat/util"
|
|
|
|
prom "github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
|
|
"go.mongodb.org/mongo-driver/bson"
|
|
|
|
"go.mongodb.org/mongo-driver/mongo"
|
|
|
|
mongoOpt "go.mongodb.org/mongo-driver/mongo/options"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2022-11-06 16:41:10 +00:00
|
|
|
promFileInitialSeekSkipped = promauto.NewGaugeVec(prom.GaugeOpts{
|
|
|
|
Namespace: PrometheusPrefix,
|
|
|
|
// Subsystem: "file",
|
|
|
|
Name: "skipped_bytes",
|
|
|
|
Help: "Bytes skipped in file after discovering",
|
|
|
|
}, []string{"filename"})
|
|
|
|
promFileCatchupDone = promauto.NewGaugeVec(prom.GaugeOpts{
|
2022-11-06 15:02:49 +00:00
|
|
|
Namespace: PrometheusPrefix,
|
2022-10-11 07:55:22 +00:00
|
|
|
Subsystem: "file",
|
|
|
|
Name: "catchupped",
|
2022-11-06 16:41:10 +00:00
|
|
|
Help: "(0 or) 1 if initial backlog has been sent; (total <= watcher_file_count)",
|
2022-10-11 07:55:22 +00:00
|
|
|
}, []string{"filename"}) // TODO: rm filename?
|
|
|
|
promFileErr = promauto.NewCounterVec(prom.CounterOpts{
|
2022-11-06 15:02:49 +00:00
|
|
|
Namespace: PrometheusPrefix,
|
2022-10-11 07:55:22 +00:00
|
|
|
Subsystem: "file",
|
|
|
|
Name: "errors_count",
|
2022-11-06 16:41:10 +00:00
|
|
|
Help: "Errors while reading file",
|
|
|
|
}, []string{"filename"})
|
|
|
|
promFileLineSize = promauto.NewHistogramVec(prom.HistogramOpts{
|
|
|
|
Namespace: PrometheusPrefix,
|
|
|
|
// Subsystem: "file",
|
|
|
|
Name: "line_size_bytes",
|
|
|
|
Help: "Log line size in bytes",
|
|
|
|
Buckets: []float64{80, 160, 320, 640, 1280},
|
|
|
|
}, []string{"filename"})
|
2022-10-11 07:55:22 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type (
|
|
|
|
submitter struct {
|
2022-11-05 23:21:30 +00:00
|
|
|
l *zap.Logger
|
2022-10-11 07:55:22 +00:00
|
|
|
|
|
|
|
hostInfo HostInfo
|
|
|
|
db *mongo.Collection
|
|
|
|
|
2022-11-04 08:47:45 +00:00
|
|
|
sync.WaitGroup
|
2022-10-11 07:55:22 +00:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2022-11-05 23:45:19 +00:00
|
|
|
const SendQueueLimit = 1024
|
|
|
|
|
|
|
|
// TODO: caller may call duplicate shipFile of same name on file replace; sends might not work properly
|
2022-11-06 20:02:29 +00:00
|
|
|
func (s *submitter) shipFile(ctx context.Context, name string, recordLimitBytes int) {
|
2022-10-11 07:55:22 +00:00
|
|
|
baseName := filepath.Base(name)
|
|
|
|
|
2022-11-06 20:02:29 +00:00
|
|
|
lineChan := make(chan rawLine)
|
|
|
|
defer close(lineChan)
|
|
|
|
|
2022-11-05 23:45:19 +00:00
|
|
|
sendChan := make(chan mLog, SendQueueLimit)
|
2022-11-06 13:46:07 +00:00
|
|
|
defer close(sendChan)
|
2022-11-05 23:45:19 +00:00
|
|
|
|
2022-11-06 20:02:29 +00:00
|
|
|
go s.parseLines(recordLimitBytes, lineChan, sendChan)
|
|
|
|
|
2022-11-06 13:46:07 +00:00
|
|
|
go s.sender(name, sendChan)
|
2022-11-05 23:45:19 +00:00
|
|
|
|
2022-11-06 16:41:10 +00:00
|
|
|
// TODO: better way to kill or wait for sendQueue before retrying (or duplicates?)
|
2022-11-05 23:21:30 +00:00
|
|
|
wait.ManagedExponentialBackoffWithContext(ctx, defaultBackoff(), func() (done bool, _ error) {
|
2022-11-05 23:45:19 +00:00
|
|
|
//
|
2022-11-06 20:02:29 +00:00
|
|
|
err := s.shipFileRoutine(ctx, name, lineChan)
|
2022-11-05 23:45:19 +00:00
|
|
|
if err == nil {
|
|
|
|
return true, nil
|
2022-10-11 07:55:22 +00:00
|
|
|
}
|
2022-11-05 23:45:19 +00:00
|
|
|
|
|
|
|
promFileErr.WithLabelValues(baseName).Add(1)
|
2022-11-06 13:57:18 +00:00
|
|
|
s.l.Error("shipping file", zap.String("filename", name), zap.Error(err))
|
2022-11-05 23:45:19 +00:00
|
|
|
return false, nil // nil since we want to loop and keep retrying indefinitely
|
2022-10-11 07:55:22 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-11-06 20:02:29 +00:00
|
|
|
func (s *submitter) shipFileRoutine(ctx context.Context, name string, sendQueue chan<- rawLine) error {
|
2022-11-06 01:43:18 +00:00
|
|
|
baseName := filepath.Base(name)
|
2022-11-05 23:45:19 +00:00
|
|
|
|
2022-11-06 01:43:18 +00:00
|
|
|
// TODO: better way for respecting ?killing sender for retry
|
2022-11-05 23:45:19 +00:00
|
|
|
for {
|
|
|
|
if len(sendQueue) == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
time.Sleep(time.Second)
|
|
|
|
}
|
|
|
|
|
2022-10-11 07:55:22 +00:00
|
|
|
// get files with offset
|
2022-11-05 23:45:19 +00:00
|
|
|
offsetResult, err := mongoWithErr(s.db.FindOne(mongoTimeoutCtx(ctx),
|
2022-11-06 20:02:29 +00:00
|
|
|
bson.D{{Key: mongoKeyHostId, Value: s.hostInfo.id}, {Key: mongoKeyFileBasename, Value: baseName}},
|
2022-11-06 14:33:57 +00:00
|
|
|
&mongoOpt.FindOneOptions{Sort: bson.D{{Key: mongoKeyOffset, Value: -1}}}, // sort descending (get largest)
|
2022-10-11 07:55:22 +00:00
|
|
|
))
|
2022-11-06 01:43:18 +00:00
|
|
|
|
2022-10-11 07:55:22 +00:00
|
|
|
if err != nil && !errors.Is(err, mongo.ErrNoDocuments) {
|
2022-11-06 16:41:10 +00:00
|
|
|
return fmt.Errorf("retrieving offset from database: %w", err)
|
2022-10-11 07:55:22 +00:00
|
|
|
}
|
|
|
|
|
2022-11-06 01:43:18 +00:00
|
|
|
var log mLog
|
2022-10-11 07:55:22 +00:00
|
|
|
if err := offsetResult.Decode(&log); err != nil && !errors.Is(err, mongo.ErrNoDocuments) {
|
2022-11-06 16:41:10 +00:00
|
|
|
return fmt.Errorf("decoding offset from database: %w", err)
|
2022-10-11 07:55:22 +00:00
|
|
|
}
|
|
|
|
|
2022-11-06 01:43:18 +00:00
|
|
|
fi, err := os.Stat(name)
|
2022-10-11 07:55:22 +00:00
|
|
|
if err != nil {
|
2022-11-06 02:04:32 +00:00
|
|
|
return fmt.Errorf("getting original file size: %w", err)
|
2022-10-11 07:55:22 +00:00
|
|
|
}
|
|
|
|
startSize := fi.Size()
|
|
|
|
|
2022-11-06 12:41:09 +00:00
|
|
|
sctx, cancel := context.WithCancel(ctx)
|
|
|
|
defer cancel()
|
|
|
|
|
2022-11-06 16:41:10 +00:00
|
|
|
promFileInitialSeekSkipped.WithLabelValues(baseName).Set(float64(log.Offset))
|
|
|
|
|
2022-11-06 12:41:09 +00:00
|
|
|
lineChan, errChan, err := util.TailFile(sctx, name, log.Offset, io.SeekStart)
|
2022-10-11 07:55:22 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("tailing file: %w", err)
|
|
|
|
}
|
2022-11-05 23:45:19 +00:00
|
|
|
|
2022-11-06 14:09:28 +00:00
|
|
|
var catchUpped bool // cache
|
2022-11-06 16:41:10 +00:00
|
|
|
promFileCatchupDone.WithLabelValues(baseName).Set(0)
|
|
|
|
|
|
|
|
// TODO: partial line combining
|
|
|
|
// TODO: promRecordDroppedTooLarge
|
2022-11-06 14:11:23 +00:00
|
|
|
|
2022-10-11 07:55:22 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case err := <-errChan:
|
|
|
|
return fmt.Errorf("tailing file: %w", err)
|
2022-11-05 23:45:19 +00:00
|
|
|
|
2022-11-06 13:46:07 +00:00
|
|
|
case line, ok := <-lineChan:
|
|
|
|
if !ok {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-11-06 20:02:29 +00:00
|
|
|
promFileLineSize.WithLabelValues(baseName).Observe(float64(len(line.Bytes)))
|
2022-11-06 16:41:10 +00:00
|
|
|
|
2022-11-06 14:09:28 +00:00
|
|
|
if !catchUpped {
|
2022-11-06 14:35:49 +00:00
|
|
|
catchUpped = line.EndOffset >= startSize
|
2022-11-06 14:09:28 +00:00
|
|
|
|
|
|
|
if catchUpped {
|
2022-11-06 16:41:10 +00:00
|
|
|
promFileCatchupDone.WithLabelValues(baseName).Set(1)
|
2022-10-11 07:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-06 20:02:29 +00:00
|
|
|
if len(line.Bytes) == 0 {
|
2022-11-06 13:57:18 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2022-11-06 20:02:29 +00:00
|
|
|
sendQueue <- rawLine{
|
|
|
|
recordMetadata: recordMetadata{
|
|
|
|
HostInfo: s.hostInfo,
|
|
|
|
File: baseName,
|
2022-11-06 01:43:18 +00:00
|
|
|
|
2022-11-06 20:02:29 +00:00
|
|
|
Offset: line.EndOffset,
|
|
|
|
},
|
|
|
|
line: line.Bytes,
|
2022-10-11 07:55:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-05 23:45:19 +00:00
|
|
|
func mongoWithErr[t interface{ Err() error }](mongoWrap t) (t, error) {
|
2022-10-11 07:55:22 +00:00
|
|
|
return mongoWrap, mongoWrap.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
// func JitterUntilCancelWithContext(pctx context.Context, f func(context.Context, context.CancelFunc), period time.Duration, jitterFactor float64, sliding bool) {
|
|
|
|
// ctx, cancel := context.WithCancel(pctx)
|
|
|
|
// wait.JitterUntil(func() { f(ctx, cancel) }, period, jitterFactor, sliding, ctx.Done())
|
|
|
|
// }
|