2022-10-11 07:55:22 +00:00
|
|
|
package logmower
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"net/http"
|
2022-11-06 00:16:54 +00:00
|
|
|
"net/url"
|
2022-10-11 07:55:22 +00:00
|
|
|
"os"
|
|
|
|
"os/signal"
|
2022-11-06 01:43:18 +00:00
|
|
|
"path/filepath"
|
2022-10-11 07:55:22 +00:00
|
|
|
"runtime"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/fsnotify/fsnotify"
|
|
|
|
prom "github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
|
|
"github.com/urfave/cli/v2"
|
|
|
|
"go.elastic.co/ecszap"
|
|
|
|
"go.mongodb.org/mongo-driver/mongo"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2022-11-06 15:02:49 +00:00
|
|
|
MachineId = "/etc/machine-id"
|
|
|
|
MongoTimeout = 10 * time.Second
|
|
|
|
PrometheusPrefix = "logmower-shipper"
|
2022-10-11 07:55:22 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// wrapper to force copying before use
|
|
|
|
func defaultBackoff() wait.Backoff {
|
|
|
|
return wait.Backoff{
|
|
|
|
Duration: 2 * time.Second,
|
|
|
|
Factor: 1.5,
|
|
|
|
Jitter: 0.1,
|
|
|
|
Cap: 30 * time.Second,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func mongoTimeoutCtx(ctx context.Context) context.Context {
|
2022-11-06 15:02:49 +00:00
|
|
|
ctx, _ = context.WithTimeout(ctx, MongoTimeout) //nolint:lostcancel (cancelled by mongo, should be bug on them //TODO)
|
2022-10-11 07:55:22 +00:00
|
|
|
return ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
var App = &cli.App{
|
2022-11-03 10:31:01 +00:00
|
|
|
Name: "logmower-shipper",
|
2022-10-11 07:55:22 +00:00
|
|
|
Version: "1.0.0",
|
|
|
|
Authors: []*cli.Author{{Name: "jtagcat"}},
|
|
|
|
|
|
|
|
Description: "Collect and ship kubernetes logs",
|
|
|
|
// Usage: "rubykana <input>",
|
2022-11-03 10:42:59 +00:00
|
|
|
// TODO: #2: yaml
|
2022-10-11 07:55:22 +00:00
|
|
|
Flags: []cli.Flag{
|
|
|
|
&cli.BoolFlag{Name: "dry-run", Usage: "Do not write to database"}, // TODO:
|
|
|
|
&cli.StringFlag{Name: "log-directory", Usage: "Directory to watch for logs", Value: "/var/log/containers"},
|
|
|
|
&cli.IntFlag{Name: "max-record-size", Value: 128 * 1024, Usage: "Maximum record size in bytes"}, // TODO:
|
|
|
|
&cli.BoolFlag{Name: "normalize-log-level", Usage: "Normalize log.level values to Syslog defined keywords"}, // TODO:
|
2022-11-05 23:45:19 +00:00
|
|
|
&cli.BoolFlag{Name: "delete-after-read", Usage: "Delete log file when it is synced to mongo, and no new lines to read", Value: false},
|
2022-10-11 07:55:22 +00:00
|
|
|
// &cli.BoolFlag{Name: "parse-json"}, //TODO:
|
|
|
|
&cli.StringFlag{Category: "k8s metadata", Name: "pod-namespace", EnvVars: []string{"KUBE_POD_NAMESPACE"}}, // TODO:
|
|
|
|
&cli.StringFlag{Category: "k8s metadata", Name: "node-name", EnvVars: []string{"KUBE_NODE_NAME"}, Required: true},
|
2022-11-06 00:16:54 +00:00
|
|
|
&cli.StringFlag{Category: "secrets", Name: "mongo-uri", EnvVars: []string{"MONGO_URI"}, Usage: "mongodb://foo:bar@host:27017/database", Required: true},
|
2022-10-11 07:55:22 +00:00
|
|
|
},
|
|
|
|
|
|
|
|
Action: func(ctx *cli.Context) error {
|
|
|
|
ctx.Context, _ = signal.NotifyContext(ctx.Context, os.Interrupt) // TODO: test
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
|
|
|
// init logger, ECS schema
|
|
|
|
core := ecszap.NewCore(ecszap.NewDefaultEncoderConfig(),
|
|
|
|
os.Stderr, zap.WarnLevel)
|
|
|
|
l := zap.New(core, zap.AddCaller())
|
|
|
|
|
|
|
|
l.Info("logmower starting", zap.String("version", ctx.App.Version))
|
|
|
|
|
|
|
|
var (
|
|
|
|
promWatcherOnline = promauto.NewGauge(prom.GaugeOpts{
|
2022-11-06 15:02:49 +00:00
|
|
|
Namespace: PrometheusPrefix,
|
2022-10-11 07:55:22 +00:00
|
|
|
Subsystem: "watcher",
|
|
|
|
Name: "online",
|
|
|
|
Help: "1 if initialized, and directory watcher has been engaged successfully",
|
|
|
|
})
|
|
|
|
|
|
|
|
promErrWatching = promauto.NewCounter(prom.CounterOpts{
|
2022-11-06 15:02:49 +00:00
|
|
|
Namespace: PrometheusPrefix,
|
2022-10-11 07:55:22 +00:00
|
|
|
Subsystem: "watcher",
|
2022-11-06 13:57:05 +00:00
|
|
|
Name: "errors",
|
2022-10-11 07:55:22 +00:00
|
|
|
Help: "Error in logmower watching log files",
|
|
|
|
})
|
|
|
|
promFilesRead = promauto.NewCounter(prom.CounterOpts{
|
2022-11-06 15:02:49 +00:00
|
|
|
Namespace: PrometheusPrefix,
|
2022-10-11 07:55:22 +00:00
|
|
|
Subsystem: "watcher",
|
2022-11-06 13:57:05 +00:00
|
|
|
Name: "seen_files",
|
2022-10-11 07:55:22 +00:00
|
|
|
Help: "Number of tracked log files",
|
|
|
|
})
|
|
|
|
)
|
|
|
|
go func() {
|
|
|
|
l.Info("/metrics starting", zap.Int("port", 2112))
|
|
|
|
http.Handle("/metrics", promhttp.Handler())
|
|
|
|
|
|
|
|
if err := http.ListenAndServe(":2112", nil); !errors.Is(err, http.ErrServerClosed) {
|
|
|
|
l.Fatal("failed to serve /metrics", zap.Error(err))
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2022-11-05 23:21:30 +00:00
|
|
|
state := submitter{l: l}
|
2022-10-11 07:55:22 +00:00
|
|
|
|
|
|
|
dbOpt := mongoMonitoredClientOptions(l).ApplyURI(ctx.String("mongo-uri"))
|
|
|
|
|
|
|
|
dbClient, err := mongo.Connect(mongoTimeoutCtx(ctx.Context))
|
|
|
|
if err != nil {
|
|
|
|
l.Fatal("connecting to mongo", zap.String("uri", dbOpt.GetURI()), zap.Error(err))
|
|
|
|
}
|
|
|
|
|
2022-11-06 00:16:54 +00:00
|
|
|
uriParsed, err := url.ParseRequestURI(ctx.String("mongo-uri"))
|
|
|
|
if err != nil {
|
|
|
|
l.Fatal("parsing URI for mongo database name", zap.Error(err))
|
|
|
|
}
|
2022-11-06 01:43:18 +00:00
|
|
|
|
|
|
|
uriParsed.Path = uriParsed.Path[1:] // remove leading slash
|
2022-11-06 00:16:54 +00:00
|
|
|
if uriParsed.Path == "" {
|
|
|
|
l.Fatal("mongo database name must be set in mongo URI")
|
|
|
|
}
|
|
|
|
|
|
|
|
state.db = dbClient.Database(uriParsed.Path).Collection("logs")
|
2022-10-11 07:55:22 +00:00
|
|
|
|
2022-11-06 14:52:22 +00:00
|
|
|
err = initializeIndexes(ctx.Context, state.db)
|
|
|
|
if err != nil {
|
|
|
|
l.Fatal("initializing indexes", zap.Error(err))
|
|
|
|
}
|
|
|
|
|
2022-10-11 07:55:22 +00:00
|
|
|
state.hostInfo, err = getHostInfo(ctx.String("node-name"))
|
|
|
|
if err != nil {
|
|
|
|
l.Fatal("gathering host info", zap.Error(err))
|
|
|
|
}
|
|
|
|
|
|
|
|
watcher, err := fsnotify.NewWatcher()
|
|
|
|
if err != nil {
|
|
|
|
l.Fatal("setting up watcher", zap.Error(err))
|
|
|
|
}
|
|
|
|
|
|
|
|
logDir := ctx.String("log-directory")
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer wg.Done()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Context.Done():
|
|
|
|
return
|
|
|
|
case event, ok := <-watcher.Events:
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
2022-11-03 10:42:59 +00:00
|
|
|
// TODO: #1: || if not in filterset
|
2022-10-11 07:55:22 +00:00
|
|
|
if event.Op != fsnotify.Create {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
promFilesRead.Add(1)
|
2022-11-06 01:43:18 +00:00
|
|
|
l.Debug("digesting new file", zap.String("name", event.Name))
|
2022-10-11 07:55:22 +00:00
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
2022-11-06 01:43:18 +00:00
|
|
|
state.shipFile(ctx.Context, event.Name, ctx.Bool("delete-after-read"))
|
2022-10-11 07:55:22 +00:00
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
|
|
|
|
case err, ok := <-watcher.Errors:
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
promErrWatching.Add(1)
|
|
|
|
l.Error("while watching log dir events", zap.Error(err))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2022-11-06 02:04:32 +00:00
|
|
|
// simulate create events to pick up files already created
|
|
|
|
err = simulateInitialCreate(logDir, watcher.Events)
|
|
|
|
if err != nil {
|
|
|
|
promErrWatching.Add(1)
|
|
|
|
l.Fatal("listing initial log directory", zap.String("name", logDir), zap.Error(err))
|
|
|
|
}
|
2022-11-06 01:43:18 +00:00
|
|
|
|
2022-10-11 07:55:22 +00:00
|
|
|
err = watcher.Add(logDir)
|
|
|
|
if err != nil {
|
|
|
|
promErrWatching.Add(1)
|
|
|
|
l.Fatal("watching log directory", zap.String("name", logDir), zap.Error(err))
|
|
|
|
}
|
|
|
|
|
|
|
|
promWatcherOnline.Set(1)
|
|
|
|
|
|
|
|
// waiting indefinitely for interrupt
|
|
|
|
wg.Wait() // wait for watch and file processors to cleanup
|
|
|
|
|
2022-11-05 23:21:30 +00:00
|
|
|
return errAppend(watcher.Close(), ctx.Err())
|
2022-10-11 07:55:22 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
type HostInfo struct {
|
|
|
|
id string
|
|
|
|
name string
|
|
|
|
arch string
|
|
|
|
}
|
|
|
|
|
|
|
|
func getHostInfo(nodeName string) (h HostInfo, err error) {
|
|
|
|
if nodeName == "" {
|
|
|
|
nodeName, err = os.Hostname()
|
|
|
|
if err != nil {
|
|
|
|
err = fmt.Errorf("name: hostname: %w", err) // don't exit early
|
|
|
|
}
|
|
|
|
}
|
|
|
|
h.name = strings.TrimSpace(nodeName)
|
|
|
|
|
2022-11-06 15:02:49 +00:00
|
|
|
id, errL := os.ReadFile(MachineId)
|
2022-11-06 00:17:00 +00:00
|
|
|
if errL != nil {
|
2022-11-06 00:16:54 +00:00
|
|
|
err = errAppend(err, fmt.Errorf("id: %w", errL))
|
2022-11-06 00:17:00 +00:00
|
|
|
}
|
2022-10-11 07:55:22 +00:00
|
|
|
|
|
|
|
h.id = strings.TrimSpace(string(id))
|
|
|
|
|
|
|
|
h.arch = runtime.GOARCH
|
|
|
|
|
|
|
|
return h, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func errAppend(a, b error) error {
|
|
|
|
if a == nil {
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
if b == nil {
|
|
|
|
return a
|
|
|
|
}
|
|
|
|
return fmt.Errorf("%e; %e", a, b)
|
|
|
|
}
|
2022-11-06 01:43:18 +00:00
|
|
|
|
|
|
|
type logMeta struct {
|
|
|
|
podName string
|
|
|
|
podNamespace string
|
|
|
|
containerName string
|
|
|
|
containerId string
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseLogName(name string) (m logMeta, ok bool) {
|
|
|
|
name = filepath.Base(name)
|
|
|
|
|
|
|
|
// https://github.com/kubernetes/design-proposals-archive/blob/8da1442ea29adccea40693357d04727127e045ed/node/kubelet-cri-logging.md
|
|
|
|
// <pod_name>_<pod_namespace>_<container_name>-<container_id>.log`
|
|
|
|
|
|
|
|
m.podName, name, ok = strings.Cut(name, "_")
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
m.podNamespace, name, ok = strings.Cut(name, "_")
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
m.containerName, name, ok = strings.Cut(name, "-")
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
m.containerId = strings.TrimSuffix(name, ".log")
|
|
|
|
|
|
|
|
return m, true
|
|
|
|
}
|
2022-11-06 02:04:32 +00:00
|
|
|
|
|
|
|
func simulateInitialCreate(dirName string, eventChan chan<- fsnotify.Event) error {
|
|
|
|
dir, err := os.ReadDir(dirName)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, file := range dir {
|
|
|
|
eventChan <- fsnotify.Event{
|
|
|
|
Name: filepath.Join(dirName, file.Name()),
|
|
|
|
Op: fsnotify.Create,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|