refactor: use py mongo struct + stdlib log

+ restructure project
This commit is contained in:
2022-11-09 14:19:56 +02:00
parent bd667c0fc1
commit 69e0230949
146 changed files with 1731 additions and 14066 deletions

View File

@@ -0,0 +1,29 @@
package mongoStruct
import (
"time"
"go.mongodb.org/mongo-driver/bson"
)
// default values without ok
func bsonLookupBoolean(b *bson.Raw, key ...string) bool {
v, _ := b.Lookup(key...).BooleanOK()
return v
}
func bsonLookupStringValue(b *bson.Raw, key ...string) string {
v, _ := b.Lookup(key...).StringValueOK()
return v
}
func bsonLookupInt64(b *bson.Raw, key ...string) int64 {
v, _ := b.Lookup(key...).Int64OK()
return v
}
func bsonLookupTime(b *bson.Raw, key ...string) time.Time {
v, _ := b.Lookup(key...).TimeOK()
return v
}

60
pkg/mongoStruct/gather.go Normal file
View File

@@ -0,0 +1,60 @@
package mongoStruct
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
)
func ParseLogName(name string) (m KubeInfo, ok bool) {
name = filepath.Base(name)
// https://github.com/kubernetes/design-proposals-archive/blob/8da1442ea29adccea40693357d04727127e045ed/node/kubelet-cri-logging.md
// <pod_name>_<pod_namespace>_<container_name>-<container_id>.log`
m.Pod, name, ok = strings.Cut(name, "_")
if !ok {
return
}
m.Namespace, name, ok = strings.Cut(name, "_")
if !ok {
return
}
m.ContainerName, name, ok = strings.Cut(name, "-")
if !ok {
return
}
m.ContainerId = strings.TrimSuffix(name, ".log")
if !strings.HasSuffix(name, ".log") {
return
}
return m, true
}
func (h *HostInfo) Populate(nodeName string) (err error) {
if nodeName == "" {
nodeName, err = os.Hostname()
if err != nil {
return fmt.Errorf("getting hostname: %w", err)
}
}
h.Name = strings.TrimSpace(nodeName)
id, err := os.ReadFile("/etc/machine-id")
if err != nil {
return fmt.Errorf("getting machineId: %w", err)
}
h.Id = strings.TrimSpace(string(id))
h.Arch = runtime.GOARCH
return nil
}

View File

@@ -0,0 +1,152 @@
package mongoStruct
import (
"context"
"time"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
)
// ctx is used directly
func InitializeIndexes(ctx context.Context, col *mongo.Collection) error {
ind := col.Indexes()
// (does not create duplicates)
_, err := ind.CreateOne(ctx, mongo.IndexModel{
Keys: bson.D{{Key: RecordKeyFilePath, Value: 1}, {Key: RecordKeyOffset, Value: -1}},
})
return err
}
// when editing, also edit everything in this file!
type (
Record struct {
File
Offset int64 // end, of last line
String string
ParsedMetadata
ParsedContent // TODO: not implemented
// added by ToBson()
timeShip time.Time
}
ParsedMetadata struct {
TimeKubernetes time.Time
StdErr bool
}
ParsedContent struct {
Content any
TimeUpstream time.Time
}
HostInfo struct {
Id string
Name string
Arch string
}
File struct {
Host *HostInfo
Path string // absolute
KubeInfo
}
KubeInfo struct {
ContainerName string
ContainerId string // unused
Namespace string
Pod string
}
)
const (
// used outside package for mongo commands
RecordKeyHostId = recordKeyHost + "." + recordKeyId
RecordKeyFilePath = recordKeyLog + "." + recordKeyFile + "." + recordKeygenericPath
RecordKeyOffset = recordKeyLog + "." + recordKeyOffset
)
// Don't use direct strings in bson types. Use the constants as keys.
// This ensures keys (and subkeys) are consistent within the package, and by consumers of it.
const (
recordKeygenericName = "name"
recordKeygenericPath = "path"
)
const (
recordKeyString = "message"
recordKeyLog = "log"
recordKeyFile = "file"
recordKeyOffset = "offset"
// recordKeyLevel = "level"
recordKeyHost = "host"
recordKeyId = "id"
recordKeyName = "name"
recordKeyArch = "architecture"
recordKeyKubernetes = "kubernetes"
recordKeyContainer = "container"
recordKeyNamespace = "namespace"
recordKeyPod = "pod"
recordKeyStream = "stderr"
recordKeyContainerTime = "container_time"
recordKeyShipTime = "ship_time"
recordKeyEvent = "event"
recordKeyTimeUpstream = "created"
recordKeyTimeKubernetes = "ingested"
recordKeyTimeMower = "@timestamp"
)
// not using marshal, since it is <0.1x performance
func (l *Record) ToBson() bson.M {
var stream string
if l.StdErr {
stream = "stderr"
} else {
stream = "stdout"
}
return bson.M{
recordKeyString: l.String,
recordKeyLog: bson.M{
recordKeyFile: bson.M{
recordKeygenericPath: l.File.Path,
},
recordKeyOffset: l.Offset,
// recordKeyLevel: , //TODO: ECS
},
recordKeyKubernetes: bson.M{
recordKeyContainer: bson.M{
recordKeygenericName: l.File.ContainerName,
},
recordKeyNamespace: l.File.Namespace,
recordKeyPod: bson.M{
recordKeygenericName: l.File.Pod,
},
},
recordKeyHost: bson.M{
recordKeyId: l.File.Host.Id,
recordKeyName: l.File.Host.Name,
recordKeyArch: l.File.Host.Arch,
},
recordKeyStream: stream,
recordKeyEvent: bson.M{
recordKeyTimeUpstream: l.TimeUpstream,
recordKeyTimeKubernetes: l.TimeKubernetes,
},
recordKeyShipTime: time.Now(),
}
}
func RecordOffsetFromBson(b *bson.Raw) int64 {
return bsonLookupInt64(b, recordKeyLog, recordKeyOffset)
}