Skip to content

Commit

Permalink
chore: overhaul logging some more
Browse files Browse the repository at this point in the history
this introduces a trace log level (to which the very low level messages
get sent) and creates more consistency across various packages. Helped
me a lot tracking down some issues.
  • Loading branch information
jonkerj committed Feb 6, 2025
1 parent 3c67fb2 commit 663d58d
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 23 deletions.
11 changes: 9 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ import (
"fmt"
"log/slog"
"os"
"strings"

"github.com/siderolabs/talos-vmtoolsd/internal/talosconnection"
"github.com/siderolabs/talos-vmtoolsd/internal/util"
"github.com/siderolabs/talos-vmtoolsd/internal/version"

"github.com/spf13/cobra"
Expand Down Expand Up @@ -42,6 +44,11 @@ var (
)

func parseLevel(s string) (slog.Level, error) {
// slog does not support trace level logging by default, but is flexible
if strings.ToUpper(s) == "TRACE" {
return util.LogLevelTrace, nil
}

var level slog.Level
err := level.UnmarshalText([]byte(s))
return level, err
Expand All @@ -64,13 +71,13 @@ func setup(cmd *cobra.Command, args []string) error {
// Our spec file passes the secret path and K8s host IP via env vars.
configPath := viper.GetString(flagTalosConfig)
if len(configPath) == 0 {
logger.Error("error: TALOS_CONFIG_PATH is a required path to a Talos configuration file")
logger.Error("a path to a Talos configuration file is required when not connecting to machined")
return errTalosSetupFailed
}

talosNode := viper.GetString(flagTalosNode)
if len(talosNode) == 0 {
logger.Error("error: TALOS_HOST is required to point to a node's internal IP")
logger.Error("you need to specify a Talos node when not connecting to machined")
return errTalosSetupFailed
}

Expand Down
7 changes: 4 additions & 3 deletions cmd/vmtoolsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ func vmtoolsd(cmd *cobra.Command, args []string) error {
// all ESXi/vSphere nodes and editing talos-vmtoolsd's DaemonSet to run only on those nodes.
if !vmcheck.IsVirtualCPU() {
// NB: We cannot simply exit(0) because DaemonSets are always restarted. TODO: or should we? Restarts get noticed, select{} won't
logger.Info("halting because the current node is not running under ESXi. fair winds!")
logger.Error("halting because the current node is not running under ESXi. fair winds!")
select {}
}

// Wires up VMware Toolbox commands to Talos apid.
vmlog := vmwlogger.New(logger.With("module", "vmw-guestinfo"))
vmguestmsg.DefaultLogger = vmlog
rpcIn, rpcOut := nanotoolbox.NewHypervisorChannelPair()
svc := nanotoolbox.NewService(logger.With("module", "nanotoolbox"), rpcIn, rpcOut)

rpcIn, rpcOut := nanotoolbox.NewHypervisorChannelPair(logger.With("module", "nanotoolbox.channel"))
svc := nanotoolbox.NewService(logger.With("module", "nanotoolbox.service"), rpcIn, rpcOut)

integrations := []integration.Integration{
integration.NewPower(logger.With("integration", "power"), api, svc),
Expand Down
7 changes: 5 additions & 2 deletions internal/integration/guestinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ type GuestInfo struct {
}

func NewGuestInfo(logger *slog.Logger, talos *talosconnection.TalosAPIConnection, service *nanotoolbox.Service) *GuestInfo {
logger.Debug("initializing")

g := &GuestInfo{
logger: logger,
talos: talos,
Expand All @@ -73,7 +75,7 @@ func (g *GuestInfo) setGuestInfo(kind guestInfoId, data []byte) {
// NB: intentionally using two spaces as separator to match open-vm-tools
l := g.logger.With("guest_info_kind", kind.Name())
msg := append([]byte(fmt.Sprintf("SetGuestInfo %d ", kind)), data...)
l.Debug("setting", "msg", msg)
l.Debug("setting", "msg", string(msg))
if _, err := g.service.Request(msg); err != nil {
l.Error("error sending guest info", "err", err)
}
Expand All @@ -93,7 +95,7 @@ func (g *GuestInfo) infoSet(kind string, value string) {
// setString wraps setGuestInfo, but for strings
func (g *GuestInfo) setString(kind guestInfoId, fn func() string) {
s := fn()
g.logger.With("guest_info_kind", kind.Name).Debug("setting guestinfo string", "str", s)
g.logger.With("guest_info_kind", kind.Name()).Debug("setting guestinfo string", "str", s)
g.setGuestInfo(kind, []byte(s))
}

Expand Down Expand Up @@ -190,6 +192,7 @@ func (g *GuestInfo) primaryIP() string {

// Register registers all handlers and capabilities
func (g *GuestInfo) Register() {
g.logger.Debug("registering")
// reset handlers
g.service.RegisterResetHandler(func() {
g.setDNSName()
Expand Down
3 changes: 3 additions & 0 deletions internal/integration/power.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ func noop() error {
}

func NewPower(logger *slog.Logger, talos *talosconnection.TalosAPIConnection, service *nanotoolbox.Service) *Power {
logger.Debug("initializing")

p := &Power{
logger: logger,
talos: talos,
Expand Down Expand Up @@ -83,6 +85,7 @@ func (p *Power) makePowerHandler(ps powerState, powerFn powerHandler) (string, n
}

func (p *Power) Register() {
p.logger.Debug("registering")
p.service.AddCapability("tools.capability.statechange")
p.service.AddCapability("tools.capability.softpowerop_retry")
p.service.RegisterCommandHandler(p.makePowerHandler(pwrHaltId, p.talos.Shutdown))
Expand Down
3 changes: 3 additions & 0 deletions internal/integration/vix.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ type VIX struct {
func NewVIX(logger *slog.Logger, talos *talosconnection.TalosAPIConnection, service *nanotoolbox.Service) *VIX {
vs := vixserver.New(logger.With("module", "vixserver"), talos)

logger.Debug("initializing")

vi := &VIX{
logger: logger,
talos: talos,
Expand All @@ -57,6 +59,7 @@ func (v *VIX) handleVIXCommand(data []byte) ([]byte, error) {
}

func (v *VIX) Register() {
v.logger.Debug("registering")
v.service.RegisterCommandHandler(vixCommand, v.handleVIXCommand)
v.vixserver.RegisterCommand(VixToolsFeatureSupportGetHandleState, v.getToolState)
}
Expand Down
4 changes: 2 additions & 2 deletions internal/talosconnection/getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (c *TalosAPIConnection) Uptime() int {
return 0
}

return int(time.Since(time.Unix(int64(resp.Messages[0].GetBootTime()), 0)).Round(time.Second))
return int(time.Since(time.Unix(int64(resp.Messages[0].GetBootTime()), 0)).Round(time.Second).Seconds())
}

type NetInterface struct {
Expand Down Expand Up @@ -109,7 +109,7 @@ func (c *TalosAPIConnection) NetInterfaces() (result []NetInterface) {
}

for _, addr := range addrMap[intf.Name] {
intf.Addrs = append(intf.Addrs, addr.Address.String())
intf.Addrs = append(intf.Addrs, addr.Address.Addr().String())
}

result = append(result, intf)
Expand Down
18 changes: 18 additions & 0 deletions internal/util/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: Copyright (c) 2020 Oliver Kuckertz, Siderolabs and Equinix
// SPDX-License-Identifier: Apache-2.0

package util

import (
"context"
"log/slog"
)

// log/slog does not implement trace logging by default, but is flexible
const (
LogLevelTrace = slog.Level(-8)
)

func TraceLog(l *slog.Logger, msg string, args ...any) {
l.Log(context.Background(), LogLevelTrace, msg, args...)
}
17 changes: 14 additions & 3 deletions pkg/nanotoolbox/channel.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"bytes"
"errors"
"fmt"
"log/slog"

"github.com/vmware/vmw-guestinfo/message"
"github.com/vmware/vmw-guestinfo/vmcheck"
Expand Down Expand Up @@ -68,11 +69,13 @@ func (c *ChannelOut) Request(request []byte) ([]byte, error) {

type hypervisorChannel struct { //nolint:govet
protocol uint32
logger *slog.Logger

*message.Channel
}

func (b *hypervisorChannel) Start() error {
b.logger.Debug("starting")
if !vmcheck.IsVirtualCPU() {
return ErrNotVirtualWorld
}
Expand All @@ -88,6 +91,8 @@ func (b *hypervisorChannel) Start() error {
}

func (b *hypervisorChannel) Stop() error {
b.logger.Debug("stopping")

if b.Channel == nil {
return nil
}
Expand All @@ -100,9 +105,15 @@ func (b *hypervisorChannel) Stop() error {
}

// NewHypervisorChannelPair returns a pair of channels for communicating with the vmx.
func NewHypervisorChannelPair() (Channel, Channel) {
in := &hypervisorChannel{protocol: tcloProtocol}
out := &hypervisorChannel{protocol: rpciProtocol}
func NewHypervisorChannelPair(logger *slog.Logger) (Channel, Channel) {
in := &hypervisorChannel{
protocol: tcloProtocol,
logger: logger.With("dir", "in"),
}
out := &hypervisorChannel{
protocol: rpciProtocol,
logger: logger.With("dir", "out"),
}

return in, out
}
32 changes: 22 additions & 10 deletions pkg/nanotoolbox/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"log/slog"
"sync"
"time"

"github.com/siderolabs/talos-vmtoolsd/internal/util"
)

const (
Expand All @@ -28,8 +30,8 @@ const (

// Service receives and dispatches incoming RPC requests from the vmx.
type Service struct { //nolint:govet
Log *slog.Logger
out *ChannelOut
logger *slog.Logger
out *ChannelOut

in Channel
name string
Expand All @@ -47,9 +49,9 @@ type Service struct { //nolint:govet
// NewService initializes a Service instance.
func NewService(log *slog.Logger, rpcIn Channel, rpcOut Channel) *Service {
s := &Service{
Log: log,
out: &ChannelOut{rpcOut},
in: rpcIn,
logger: log,
out: &ChannelOut{rpcOut},
in: rpcIn,

name: "toolbox", // same name used by vmtoolsd
wg: new(sync.WaitGroup),
Expand All @@ -72,6 +74,7 @@ func NewService(log *slog.Logger, rpcIn Channel, rpcOut Channel) *Service {

// wrap ChannelOut.Request for demarcation/protection
func (s *Service) Request(request []byte) ([]byte, error) {
util.TraceLog(s.logger, "requesting", "request", request)
return s.out.Request(request)
}

Expand Down Expand Up @@ -107,10 +110,12 @@ func (s *Service) startChannel() error {

func (s *Service) checkReset() error {
if s.rpcError {
s.logger.Warn("resetting because of rpc error", "err", s.rpcError)
s.stopChannel()

err := s.startChannel()
if err != nil {
s.logger.Error("error restarting channel", "err", err)
s.delay = resetDelay

return err
Expand Down Expand Up @@ -153,21 +158,26 @@ func (s *Service) Start() error {
}

err = s.in.Send(response)
util.TraceLog(s.logger, "send", "err", err, "response", string(response))
response = nil

if err != nil {
s.logger.Warn("send failed")
s.delay = resetDelay
s.rpcError = true

continue
}

request, _ := s.in.Receive() //nolint:errcheck
util.TraceLog(s.logger, "received request", "request", string(request))

if len(request) > 0 {
response = s.Dispatch(request)
util.TraceLog(s.logger, "response from dispatch", "request", string(request), "response", string(response))
s.delay = 0
} else {
util.TraceLog(s.logger, "backing off")
s.backoff()
}
}
Expand Down Expand Up @@ -195,40 +205,42 @@ type OptionHandler func(key, value string)

// AddCapability adds a capability to the Service.
func (s *Service) AddCapability(name string) {
s.logger.Debug("registering capability", "capability", name)
s.capabilities = append(s.capabilities, name)
}

// RegisterCommandHandler adds a CommandHandler to the Service.
func (s *Service) RegisterCommandHandler(name string, handler CommandHandler) {
s.logger.Debug("registering command handler", "command", name)
s.commandHandlers[name] = handler
}

// RegisterOptionHandler adds an OptionHandler to the Service.
func (s *Service) RegisterOptionHandler(key string, handler OptionHandler) {
s.logger.Debug("registering command handler", "option", key)
s.optionHandlers[key] = handler
}

// RegisterResetHandler adds a function to be called when the Service is reset.
func (s *Service) RegisterResetHandler(f func()) {
s.logger.Debug("registering a reset handler")
s.resetHandlers = append(s.resetHandlers, f)
}

// Dispatch an incoming RPC request to a CommandHandler.
func (s *Service) Dispatch(request []byte) []byte {
s.logger.Debug("dispatching", "request", string(request))
msg := bytes.SplitN(request, []byte{' '}, 2)
name := msg[0]

// Trim NULL byte terminator
name = bytes.TrimRight(name, "\x00")
l := s.Log.With("handler_name", string(name))

l.Debug("incoming RPC request")
l := s.logger.With("handler_kind", string(name))

handler, ok := s.commandHandlers[string(name)]

if !ok {
l.Debug("unknown command")

l.Debug("unknown command kind")
return []byte("ERROR Unknown Command")
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/vixserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (v *VIXCommandServer) Dispatch(data []byte) ([]byte, error) {

handler, ok := v.registry[header.OpCode]
if !ok {
l.Warn("unhandled command")
l.Debug("unhandled command") // debug level, as ESX issues way more commands than we care for

return createVIXCommandResult(header, vix.UnrecognizedCommandInGuest, nil, nil), nil
}
Expand All @@ -120,5 +120,6 @@ func (v *VIXCommandServer) Dispatch(data []byte) ([]byte, error) {
}

func (v *VIXCommandServer) RegisterCommand(command uint32, handler vixCommandHandler) {
v.logger.Info("registering vix command", "command", command)
v.registry[command] = handler
}

0 comments on commit 663d58d

Please sign in to comment.