prom: homogenize metrics with python prototype
This commit is contained in:
parent
28a518be7f
commit
ce066129b3
35
cmd/mongo.go
35
cmd/mongo.go
@ -12,45 +12,48 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func mongoMonitoredClientOptions(l *zap.Logger) *mongoOpt.ClientOptions {
|
func mongoMonitoredClientOptions(l *zap.Logger) *mongoOpt.ClientOptions {
|
||||||
promMongoHeartbeat := promauto.NewHistogramVec(prom.HistogramOpts{
|
promDbHeartbeat := promauto.NewHistogramVec(prom.HistogramOpts{
|
||||||
Subsystem: "mongo",
|
Namespace: PrometheusPrefix,
|
||||||
|
Subsystem: "database",
|
||||||
Name: "heartbeat_time",
|
Name: "heartbeat_time",
|
||||||
Help: "Time in ns for succeeded heartbeat, or 0 on failure",
|
Help: "Time in ns for succeeded heartbeat, or 0 on failure",
|
||||||
Buckets: []float64{1},
|
Buckets: []float64{1},
|
||||||
}, []string{"connection_id"})
|
}, []string{"connection_id"})
|
||||||
|
|
||||||
promMongoCmd := promauto.NewHistogramVec(prom.HistogramOpts{
|
promDbCmd := promauto.NewHistogramVec(prom.HistogramOpts{
|
||||||
Subsystem: "mongo",
|
Namespace: PrometheusPrefix,
|
||||||
Name: "command_time",
|
Subsystem: "database",
|
||||||
|
Name: "operation_latency", // "command_time",
|
||||||
Help: "Time in ns of commands",
|
Help: "Time in ns of commands",
|
||||||
Buckets: prom.DefBuckets,
|
Buckets: []float64{0.1, 0.2, 0.5, 1, 5, 10, 50},
|
||||||
}, []string{"connection_id", "command_name"})
|
}, []string{"connection_id", "command_name"})
|
||||||
|
|
||||||
promMongoCmdErr := promauto.NewCounterVec(prom.CounterOpts{
|
promDbCmdErr := promauto.NewCounterVec(prom.CounterOpts{
|
||||||
Subsystem: "mongo",
|
Namespace: PrometheusPrefix,
|
||||||
|
Subsystem: "database",
|
||||||
Name: "errors",
|
Name: "errors",
|
||||||
Help: "Count of failed commands",
|
Help: "Failed commands (also reflected elsewhere)",
|
||||||
}, []string{"connection_id", "command_name"})
|
}, []string{"connection_id", "command_name"})
|
||||||
|
|
||||||
return mongoOpt.Client().
|
return mongoOpt.Client().
|
||||||
SetServerMonitor(&mongoEvent.ServerMonitor{
|
SetServerMonitor(&mongoEvent.ServerMonitor{
|
||||||
ServerHeartbeatSucceeded: func(ev *mongoEvent.ServerHeartbeatSucceededEvent) {
|
ServerHeartbeatSucceeded: func(ev *mongoEvent.ServerHeartbeatSucceededEvent) {
|
||||||
promMongoHeartbeat.WithLabelValues(ev.ConnectionID).Observe(float64(ev.DurationNanos))
|
promDbHeartbeat.WithLabelValues(ev.ConnectionID).Observe(float64(ev.DurationNanos))
|
||||||
},
|
},
|
||||||
ServerHeartbeatFailed: func(ev *mongoEvent.ServerHeartbeatFailedEvent) {
|
ServerHeartbeatFailed: func(ev *mongoEvent.ServerHeartbeatFailedEvent) {
|
||||||
promMongoHeartbeat.WithLabelValues(ev.ConnectionID).Observe(0)
|
promDbHeartbeat.WithLabelValues(ev.ConnectionID).Observe(0)
|
||||||
l.Error("mongo heartbeat", zap.Error(ev.Failure), zap.String("connection_id", ev.ConnectionID))
|
l.Error("database heartbeat", zap.Error(ev.Failure), zap.String("connection_id", ev.ConnectionID))
|
||||||
},
|
},
|
||||||
}).
|
}).
|
||||||
SetMonitor(&mongoEvent.CommandMonitor{
|
SetMonitor(&mongoEvent.CommandMonitor{
|
||||||
Succeeded: func(_ context.Context, ev *mongoEvent.CommandSucceededEvent) {
|
Succeeded: func(_ context.Context, ev *mongoEvent.CommandSucceededEvent) {
|
||||||
promMongoCmd.WithLabelValues(ev.ConnectionID, ev.CommandName).Observe(float64(ev.DurationNanos))
|
promDbCmd.WithLabelValues(ev.ConnectionID, ev.CommandName).Observe(float64(ev.DurationNanos))
|
||||||
},
|
},
|
||||||
Failed: func(_ context.Context, ev *mongoEvent.CommandFailedEvent) {
|
Failed: func(_ context.Context, ev *mongoEvent.CommandFailedEvent) {
|
||||||
promMongoCmd.WithLabelValues(ev.ConnectionID, ev.CommandName).Observe(float64(ev.DurationNanos))
|
promDbCmd.WithLabelValues(ev.ConnectionID, ev.CommandName).Observe(float64(ev.DurationNanos))
|
||||||
|
|
||||||
promMongoCmdErr.WithLabelValues(ev.ConnectionID, ev.CommandName).Add(1)
|
promDbCmdErr.WithLabelValues(ev.ConnectionID, ev.CommandName).Add(1)
|
||||||
l.Error("mongo command", zap.Error(fmt.Errorf("%s", ev.Failure)), zap.String("connection_id", ev.ConnectionID), zap.String("command_name", ev.CommandName)) // TODO: https://github.com/mongodb/mongo-go-driver/pull/1105
|
l.Error("database command", zap.Error(fmt.Errorf("%s", ev.Failure)), zap.String("connection_id", ev.ConnectionID), zap.String("command_name", ev.CommandName)) // TODO: https://github.com/mongodb/mongo-go-driver/pull/1105
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
51
cmd/mower.go
51
cmd/mower.go
@ -27,8 +27,8 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
MachineId = "/etc/machine-id"
|
MachineId = "/etc/machine-id"
|
||||||
MongoTimeout = 10 * time.Second
|
DatabaseCommandTimeout = 10 * time.Second
|
||||||
PrometheusPrefix = "logmower-shipper"
|
PrometheusPrefix = "logmower"
|
||||||
)
|
)
|
||||||
|
|
||||||
// wrapper to force copying before use
|
// wrapper to force copying before use
|
||||||
@ -42,7 +42,7 @@ func defaultBackoff() wait.Backoff {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func mongoTimeoutCtx(ctx context.Context) context.Context {
|
func mongoTimeoutCtx(ctx context.Context) context.Context {
|
||||||
ctx, _ = context.WithTimeout(ctx, MongoTimeout) //nolint:lostcancel (cancelled by mongo, should be bug on them //TODO)
|
ctx, _ = context.WithTimeout(ctx, DatabaseCommandTimeout) //nolint:lostcancel (cancelled by mongo, should be bug on them //TODO)
|
||||||
return ctx
|
return ctx
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,7 +59,7 @@ var App = &cli.App{
|
|||||||
&cli.StringFlag{Name: "log-directory", Usage: "Directory to watch for logs", Value: "/var/log/containers"},
|
&cli.StringFlag{Name: "log-directory", Usage: "Directory to watch for logs", Value: "/var/log/containers"},
|
||||||
&cli.IntFlag{Name: "max-record-size", Value: 128 * 1024, Usage: "Maximum record size in bytes"}, // TODO:
|
&cli.IntFlag{Name: "max-record-size", Value: 128 * 1024, Usage: "Maximum record size in bytes"}, // TODO:
|
||||||
&cli.BoolFlag{Name: "normalize-log-level", Usage: "Normalize log.level values to Syslog defined keywords"}, // TODO:
|
&cli.BoolFlag{Name: "normalize-log-level", Usage: "Normalize log.level values to Syslog defined keywords"}, // TODO:
|
||||||
&cli.BoolFlag{Name: "delete-after-read", Usage: "Delete log file when it is synced to mongo, and no new lines to read", Value: false},
|
&cli.BoolFlag{Name: "delete-after-read", Usage: "Delete log file when it is synced to database, and no new lines to read", Value: false},
|
||||||
// &cli.BoolFlag{Name: "parse-json"}, //TODO:
|
// &cli.BoolFlag{Name: "parse-json"}, //TODO:
|
||||||
&cli.StringFlag{Category: "k8s metadata", Name: "pod-namespace", EnvVars: []string{"KUBE_POD_NAMESPACE"}}, // TODO:
|
&cli.StringFlag{Category: "k8s metadata", Name: "pod-namespace", EnvVars: []string{"KUBE_POD_NAMESPACE"}}, // TODO:
|
||||||
&cli.StringFlag{Category: "k8s metadata", Name: "node-name", EnvVars: []string{"KUBE_NODE_NAME"}, Required: true},
|
&cli.StringFlag{Category: "k8s metadata", Name: "node-name", EnvVars: []string{"KUBE_NODE_NAME"}, Required: true},
|
||||||
@ -85,18 +85,30 @@ var App = &cli.App{
|
|||||||
Help: "1 if initialized, and directory watcher has been engaged successfully",
|
Help: "1 if initialized, and directory watcher has been engaged successfully",
|
||||||
})
|
})
|
||||||
|
|
||||||
promErrWatching = promauto.NewCounter(prom.CounterOpts{
|
promWatcherErr = promauto.NewCounter(prom.CounterOpts{
|
||||||
Namespace: PrometheusPrefix,
|
Namespace: PrometheusPrefix,
|
||||||
Subsystem: "watcher",
|
Subsystem: "watcher",
|
||||||
Name: "errors",
|
Name: "errors",
|
||||||
Help: "Error in logmower watching log files",
|
Help: "Error in logmower watching log files",
|
||||||
})
|
})
|
||||||
promFilesRead = promauto.NewCounter(prom.CounterOpts{
|
promWatcherFilesStarted = promauto.NewCounter(prom.CounterOpts{
|
||||||
Namespace: PrometheusPrefix,
|
Namespace: PrometheusPrefix,
|
||||||
Subsystem: "watcher",
|
// Subsystem: "watcher",
|
||||||
Name: "seen_files",
|
Name: "log_file", // "discovered_logfiles",
|
||||||
Help: "Number of tracked log files",
|
Help: "Number of tracked log files",
|
||||||
})
|
})
|
||||||
|
promWatcherFilesSkipped = promauto.NewCounter(prom.CounterOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "watcher",
|
||||||
|
Name: "invalid_filename", // "skipped_files",
|
||||||
|
Help: "Number of files in log directory skipped due to unexpected filename",
|
||||||
|
})
|
||||||
|
promWatcherEvents = promauto.NewCounter(prom.CounterOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "watcher",
|
||||||
|
Name: "inotify_event", // "events",
|
||||||
|
Help: "Number of events while watchng (includes initial create events for existing file discovery)",
|
||||||
|
})
|
||||||
)
|
)
|
||||||
go func() {
|
go func() {
|
||||||
l.Info("/metrics starting", zap.Int("port", 2112))
|
l.Info("/metrics starting", zap.Int("port", 2112))
|
||||||
@ -113,7 +125,7 @@ var App = &cli.App{
|
|||||||
|
|
||||||
dbClient, err := mongo.Connect(mongoTimeoutCtx(ctx.Context))
|
dbClient, err := mongo.Connect(mongoTimeoutCtx(ctx.Context))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.Fatal("connecting to mongo", zap.String("uri", dbOpt.GetURI()), zap.Error(err))
|
l.Fatal("connecting to database", zap.String("uri", dbOpt.GetURI()), zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
uriParsed, err := url.ParseRequestURI(ctx.String("mongo-uri"))
|
uriParsed, err := url.ParseRequestURI(ctx.String("mongo-uri"))
|
||||||
@ -157,12 +169,20 @@ var App = &cli.App{
|
|||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// TODO: #1: || if not in filterset
|
promWatcherEvents.Add(1)
|
||||||
|
|
||||||
if event.Op != fsnotify.Create {
|
if event.Op != fsnotify.Create {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
promFilesRead.Add(1)
|
// TODO: #1: || if not in filterset
|
||||||
|
_, ok = parseLogName(event.Name)
|
||||||
|
if !ok {
|
||||||
|
promWatcherFilesSkipped.Add(1)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
promWatcherFilesStarted.Add(1)
|
||||||
l.Debug("digesting new file", zap.String("name", event.Name))
|
l.Debug("digesting new file", zap.String("name", event.Name))
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
@ -175,7 +195,7 @@ var App = &cli.App{
|
|||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
promErrWatching.Add(1)
|
promWatcherErr.Add(1)
|
||||||
l.Error("while watching log dir events", zap.Error(err))
|
l.Error("while watching log dir events", zap.Error(err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -184,13 +204,13 @@ var App = &cli.App{
|
|||||||
// simulate create events to pick up files already created
|
// simulate create events to pick up files already created
|
||||||
err = simulateInitialCreate(logDir, watcher.Events)
|
err = simulateInitialCreate(logDir, watcher.Events)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
promErrWatching.Add(1)
|
promWatcherErr.Add(1)
|
||||||
l.Fatal("listing initial log directory", zap.String("name", logDir), zap.Error(err))
|
l.Fatal("listing initial log directory", zap.String("name", logDir), zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
err = watcher.Add(logDir)
|
err = watcher.Add(logDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
promErrWatching.Add(1)
|
promWatcherErr.Add(1)
|
||||||
l.Fatal("watching log directory", zap.String("name", logDir), zap.Error(err))
|
l.Fatal("watching log directory", zap.String("name", logDir), zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -269,6 +289,9 @@ func parseLogName(name string) (m logMeta, ok bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
m.containerId = strings.TrimSuffix(name, ".log")
|
m.containerId = strings.TrimSuffix(name, ".log")
|
||||||
|
if !strings.HasSuffix(name, ".log") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
return m, true
|
return m, true
|
||||||
}
|
}
|
||||||
|
@ -12,70 +12,44 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
promShipperMongoSent = promauto.NewCounterVec(prom.CounterOpts{
|
|
||||||
Namespace: PrometheusPrefix,
|
|
||||||
Subsystem: "shipper",
|
|
||||||
Name: "sent",
|
|
||||||
Help: "Log lines successfully committed to mongo",
|
|
||||||
}, []string{"filename"})
|
|
||||||
promShipperMongoSentError = promauto.NewCounterVec(prom.CounterOpts{
|
|
||||||
Namespace: PrometheusPrefix,
|
|
||||||
Subsystem: "shipper",
|
|
||||||
Name: "mongo_errors",
|
|
||||||
Help: "Errors while submitting to mongo", // TODO:
|
|
||||||
}, []string{"filename"})
|
|
||||||
promLineParsingErr = promauto.NewCounterVec(prom.CounterOpts{
|
|
||||||
Namespace: PrometheusPrefix,
|
|
||||||
Subsystem: "shipper",
|
|
||||||
Name: "lines_parsing_errors",
|
|
||||||
Help: "Errors while parsing log line suffixes",
|
|
||||||
}, []string{"filename"})
|
|
||||||
promShipperQueued = promauto.NewGaugeVec(prom.GaugeOpts{
|
promShipperQueued = promauto.NewGaugeVec(prom.GaugeOpts{
|
||||||
Namespace: PrometheusPrefix,
|
Namespace: PrometheusPrefix,
|
||||||
Subsystem: "shipper",
|
// Subsystem: "shipper",
|
||||||
Name: "queued",
|
Name: "shipper_record", // "queued",
|
||||||
Help: "Log lines in queue to be batched and sent to mongo",
|
Help: "Log records in queue to be batched and sent to database",
|
||||||
|
}, []string{"filename"})
|
||||||
|
promShipperDbSent = promauto.NewCounterVec(prom.CounterOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "shipper",
|
||||||
|
Name: "record", // "sent",
|
||||||
|
Help: "Log records successfully committed to database",
|
||||||
|
}, []string{"filename"})
|
||||||
|
promShipperBatchSizeResult = promauto.NewHistogram(prom.HistogramOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "shipper",
|
||||||
|
Name: "bulk_submission_message", // "items_in_batch"
|
||||||
|
Help: "Batch size for database submissions",
|
||||||
|
Buckets: []float64{1, 5, 10, 50, 100, 500, 1000, 5000, 10000},
|
||||||
|
})
|
||||||
|
promShipperMongoSentError = promauto.NewCounterVec(prom.CounterOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "shipper",
|
||||||
|
Name: "insertion_error", // "errors",
|
||||||
|
Help: "Errors while submitting to database", // TODO:
|
||||||
}, []string{"filename"})
|
}, []string{"filename"})
|
||||||
promShipperSynced = promauto.NewGaugeVec(prom.GaugeOpts{
|
promShipperSynced = promauto.NewGaugeVec(prom.GaugeOpts{
|
||||||
Namespace: PrometheusPrefix,
|
Namespace: PrometheusPrefix,
|
||||||
Subsystem: "shipper",
|
Subsystem: "shipper",
|
||||||
Name: "batches_synced",
|
Name: "batches_synced",
|
||||||
Help: "All batches available have been sent to mongo",
|
Help: "All batches available have been committed database (0 or 1)",
|
||||||
}, []string{"filename"})
|
}, []string{"filename"})
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
MaxBatchItems = 100
|
MaxBatchItems = 10000
|
||||||
MaxBatchTime = time.Second
|
MaxBatchTime = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
|
||||||
promauto.NewGaugeFunc(prom.GaugeOpts{
|
|
||||||
Namespace: PrometheusPrefix,
|
|
||||||
Subsystem: "shipper",
|
|
||||||
Name: "queue_size",
|
|
||||||
Help: "Submit queue size cap",
|
|
||||||
}, func() float64 {
|
|
||||||
return float64(SendQueueLimit)
|
|
||||||
})
|
|
||||||
promauto.NewGaugeFunc(prom.GaugeOpts{
|
|
||||||
Namespace: PrometheusPrefix,
|
|
||||||
Subsystem: "shipper",
|
|
||||||
Name: "batch_size",
|
|
||||||
Help: "batching size cap",
|
|
||||||
}, func() float64 {
|
|
||||||
return float64(MaxBatchItems)
|
|
||||||
})
|
|
||||||
promauto.NewGaugeFunc(prom.GaugeOpts{
|
|
||||||
Namespace: PrometheusPrefix,
|
|
||||||
Subsystem: "shipper",
|
|
||||||
Name: "batch_time",
|
|
||||||
Help: "batching delay cap",
|
|
||||||
}, func() float64 {
|
|
||||||
return float64(MaxBatchTime)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *submitter) sender(name string, sendQueue <-chan mLog) {
|
func (s *submitter) sender(name string, sendQueue <-chan mLog) {
|
||||||
baseName := filepath.Base(name)
|
baseName := filepath.Base(name)
|
||||||
|
|
||||||
@ -123,11 +97,11 @@ func (s *submitter) sender(name string, sendQueue <-chan mLog) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result, err := s.db.InsertMany(mongoTimeoutCtx(context.Background()), batchBson, nil)
|
result, err := s.db.InsertMany(mongoTimeoutCtx(context.Background()), batchBson, nil)
|
||||||
promShipperMongoSent.WithLabelValues(baseName).Add(float64(
|
promShipperDbSent.WithLabelValues(baseName).Add(float64(
|
||||||
len(result.InsertedIDs)))
|
len(result.InsertedIDs)))
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.l.Error("mongo send returned error; TODO: add some selective retry here or something", zap.Error(err)) // TODO:
|
s.l.Error("submission to database", zap.Error(err)) // TODO: add some selective retry here or something
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,17 +22,42 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
promCatchupDone = promauto.NewGaugeVec(prom.GaugeOpts{
|
promFileInitialSeekSkipped = promauto.NewGaugeVec(prom.GaugeOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "file",
|
||||||
|
Name: "skipped_bytes",
|
||||||
|
Help: "Bytes skipped in file after discovering",
|
||||||
|
}, []string{"filename"})
|
||||||
|
promFileCatchupDone = promauto.NewGaugeVec(prom.GaugeOpts{
|
||||||
Namespace: PrometheusPrefix,
|
Namespace: PrometheusPrefix,
|
||||||
Subsystem: "file",
|
Subsystem: "file",
|
||||||
Name: "catchupped",
|
Name: "catchupped",
|
||||||
Help: "Files where initial backlog has been sent; (total <= watcher_file_count)",
|
Help: "(0 or) 1 if initial backlog has been sent; (total <= watcher_file_count)",
|
||||||
}, []string{"filename"}) // TODO: rm filename?
|
}, []string{"filename"}) // TODO: rm filename?
|
||||||
promFileErr = promauto.NewCounterVec(prom.CounterOpts{
|
promFileErr = promauto.NewCounterVec(prom.CounterOpts{
|
||||||
Namespace: PrometheusPrefix,
|
Namespace: PrometheusPrefix,
|
||||||
Subsystem: "file",
|
Subsystem: "file",
|
||||||
Name: "errors_count",
|
Name: "errors_count",
|
||||||
Help: "Error count for reading files",
|
Help: "Errors while reading file",
|
||||||
|
}, []string{"filename"})
|
||||||
|
promFileLineSize = promauto.NewHistogramVec(prom.HistogramOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "file",
|
||||||
|
Name: "line_size_bytes",
|
||||||
|
Help: "Log line size in bytes",
|
||||||
|
Buckets: []float64{80, 160, 320, 640, 1280},
|
||||||
|
}, []string{"filename"})
|
||||||
|
promRecordPrefixParsingErr = promauto.NewCounterVec(prom.CounterOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
Subsystem: "record",
|
||||||
|
Name: "parsing_errors",
|
||||||
|
Help: "Errors while parsing log line prefixes",
|
||||||
|
}, []string{"filename"})
|
||||||
|
promRecordDroppedTooLarge = promauto.NewCounterVec(prom.CounterOpts{
|
||||||
|
Namespace: PrometheusPrefix,
|
||||||
|
// Subsystem: "record",
|
||||||
|
Name: "dropped_lines", // "dropped",
|
||||||
|
Help: "Records dropped due to being too large",
|
||||||
}, []string{"filename"})
|
}, []string{"filename"})
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -58,7 +83,7 @@ func (s *submitter) shipFile(ctx context.Context, name string, deleteAfterRead b
|
|||||||
|
|
||||||
go s.sender(name, sendChan)
|
go s.sender(name, sendChan)
|
||||||
|
|
||||||
// TODO: better way to kill or wait for mongo sendQueue before retrying (or duplicates?)
|
// TODO: better way to kill or wait for sendQueue before retrying (or duplicates?)
|
||||||
wait.ManagedExponentialBackoffWithContext(ctx, defaultBackoff(), func() (done bool, _ error) {
|
wait.ManagedExponentialBackoffWithContext(ctx, defaultBackoff(), func() (done bool, _ error) {
|
||||||
//
|
//
|
||||||
err := s.shipFileRoutine(ctx, name, sendChan)
|
err := s.shipFileRoutine(ctx, name, sendChan)
|
||||||
@ -90,12 +115,12 @@ func (s *submitter) shipFileRoutine(ctx context.Context, name string, sendQueue
|
|||||||
))
|
))
|
||||||
|
|
||||||
if err != nil && !errors.Is(err, mongo.ErrNoDocuments) {
|
if err != nil && !errors.Is(err, mongo.ErrNoDocuments) {
|
||||||
return fmt.Errorf("retrieving mongo offset: %w", err)
|
return fmt.Errorf("retrieving offset from database: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var log mLog
|
var log mLog
|
||||||
if err := offsetResult.Decode(&log); err != nil && !errors.Is(err, mongo.ErrNoDocuments) {
|
if err := offsetResult.Decode(&log); err != nil && !errors.Is(err, mongo.ErrNoDocuments) {
|
||||||
return fmt.Errorf("decoding mongo offset: %w", err)
|
return fmt.Errorf("decoding offset from database: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fi, err := os.Stat(name)
|
fi, err := os.Stat(name)
|
||||||
@ -107,13 +132,18 @@ func (s *submitter) shipFileRoutine(ctx context.Context, name string, sendQueue
|
|||||||
sctx, cancel := context.WithCancel(ctx)
|
sctx, cancel := context.WithCancel(ctx)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
|
promFileInitialSeekSkipped.WithLabelValues(baseName).Set(float64(log.Offset))
|
||||||
|
|
||||||
lineChan, errChan, err := util.TailFile(sctx, name, log.Offset, io.SeekStart)
|
lineChan, errChan, err := util.TailFile(sctx, name, log.Offset, io.SeekStart)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("tailing file: %w", err)
|
return fmt.Errorf("tailing file: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var catchUpped bool // cache
|
var catchUpped bool // cache
|
||||||
promCatchupDone.WithLabelValues(baseName).Set(0)
|
promFileCatchupDone.WithLabelValues(baseName).Set(0)
|
||||||
|
|
||||||
|
// TODO: partial line combining
|
||||||
|
// TODO: promRecordDroppedTooLarge
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -125,11 +155,13 @@ func (s *submitter) shipFileRoutine(ctx context.Context, name string, sendQueue
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
promFileLineSize.WithLabelValues(baseName).Observe(float64(len(line.String)))
|
||||||
|
|
||||||
if !catchUpped {
|
if !catchUpped {
|
||||||
catchUpped = line.EndOffset >= startSize
|
catchUpped = line.EndOffset >= startSize
|
||||||
|
|
||||||
if catchUpped {
|
if catchUpped {
|
||||||
promCatchupDone.WithLabelValues(baseName).Set(1)
|
promFileCatchupDone.WithLabelValues(baseName).Set(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,7 +175,7 @@ func (s *submitter) shipFileRoutine(ctx context.Context, name string, sendQueue
|
|||||||
split := strings.SplitN(line.String, " ", 4)
|
split := strings.SplitN(line.String, " ", 4)
|
||||||
if len(split) != 4 {
|
if len(split) != 4 {
|
||||||
log = line.String
|
log = line.String
|
||||||
promLineParsingErr.WithLabelValues(baseName).Add(1)
|
promRecordPrefixParsingErr.WithLabelValues(baseName).Add(1)
|
||||||
s.l.Error("parsing line", zap.Error(fmt.Errorf("expected at least 3 spaces in container log")), zap.Int("got", len(split)-1), zap.String("file", name))
|
s.l.Error("parsing line", zap.Error(fmt.Errorf("expected at least 3 spaces in container log")), zap.Int("got", len(split)-1), zap.String("file", name))
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -151,7 +183,7 @@ func (s *submitter) shipFileRoutine(ctx context.Context, name string, sendQueue
|
|||||||
|
|
||||||
collectTime, err = time.Parse(time.RFC3339Nano, split[0])
|
collectTime, err = time.Parse(time.RFC3339Nano, split[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
promLineParsingErr.WithLabelValues(baseName).Add(1)
|
promRecordPrefixParsingErr.WithLabelValues(baseName).Add(1)
|
||||||
s.l.Error("parsing line time", zap.Error(err), zap.String("file", name))
|
s.l.Error("parsing line time", zap.Error(err), zap.String("file", name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user