Skip to content

Commit

Permalink
Merge pull request #971 from prometheus/superq/fix_exporter_metrics
Browse files Browse the repository at this point in the history
Fix exporter metrics
  • Loading branch information
roidelapluie authored Aug 29, 2023
2 parents ef0c822 + df7ac53 commit 2dee7c4
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 89 deletions.
98 changes: 19 additions & 79 deletions collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,15 @@ import (
"github.com/go-kit/log/level"
"github.com/gosnmp/gosnmp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/prometheus/snmp_exporter/config"
)

const (
namespace = "snmp"
)

var (
// 64-bit float mantissa: https://en.wikipedia.org/wiki/Double-precision_floating-point_format
float64Mantissa uint64 = 9007199254740992
wrapCounters = kingpin.Flag("snmp.wrap-large-counters", "Wrap 64-bit counters to avoid floating point rounding.").Default("true").Bool()
srcAddress = kingpin.Flag("snmp.source-address", "Source address to send snmp from in the format 'address:port' to use when connecting targets. If the port parameter is empty or '0', as in '127.0.0.1:' or '[::1]:0', a source port number is automatically (random) chosen.").Default("").String()

snmpDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "snmp_collection_duration_seconds",
Help: "Duration of collections by the SNMP exporter",
},
[]string{"auth", "module"},
)
)

// Types preceded by an enum with their actual type.
Expand Down Expand Up @@ -89,21 +76,13 @@ func listToOid(l []int) string {
return strings.Join(result, ".")
}

func InitModuleMetrics(auths map[string]*config.Auth, modules map[string]*config.Module) {
for auth := range auths {
for module := range modules {
snmpDuration.WithLabelValues(auth, module)
}
}
}

type ScrapeResults struct {
pdus []gosnmp.SnmpPDU
packets uint64
retries uint64
}

func ScrapeTarget(ctx context.Context, target string, auth *config.Auth, module *config.Module, logger log.Logger, metrics internalMetrics) (ScrapeResults, error) {
func ScrapeTarget(ctx context.Context, target string, auth *config.Auth, module *config.Module, logger log.Logger, metrics Metrics) (ScrapeResults, error) {
results := ScrapeResults{}
// Set the options.
snmp := gosnmp.GoSNMP{}
Expand All @@ -123,14 +102,14 @@ func ScrapeTarget(ctx context.Context, target string, auth *config.Auth, module
var sent time.Time
snmp.OnSent = func(x *gosnmp.GoSNMP) {
sent = time.Now()
metrics.snmpPackets.Inc()
metrics.SNMPPackets.Inc()
results.packets++
}
snmp.OnRecv = func(x *gosnmp.GoSNMP) {
metrics.snmpDuration.Observe(time.Since(sent).Seconds())
metrics.SNMPDuration.Observe(time.Since(sent).Seconds())
}
snmp.OnRetry = func(x *gosnmp.GoSNMP) {
metrics.snmpRetries.Inc()
metrics.SNMPRetries.Inc()
results.retries++
}

Expand Down Expand Up @@ -271,7 +250,7 @@ func configureTarget(g *gosnmp.GoSNMP, target string) error {
return nil
}

func filterAllowedIndices(logger log.Logger, filter config.DynamicFilter, pdus []gosnmp.SnmpPDU, allowedList []string, metrics internalMetrics) []string {
func filterAllowedIndices(logger log.Logger, filter config.DynamicFilter, pdus []gosnmp.SnmpPDU, allowedList []string, metrics Metrics) []string {
level.Debug(logger).Log("msg", "Evaluating rule for oid", "oid", filter.Oid)
for _, pdu := range pdus {
found := false
Expand Down Expand Up @@ -365,11 +344,12 @@ func buildMetricTree(metrics []*config.Metric) *MetricNode {
return metricTree
}

type internalMetrics struct {
snmpUnexpectedPduType prometheus.Counter
snmpDuration prometheus.Histogram
snmpPackets prometheus.Counter
snmpRetries prometheus.Counter
type Metrics struct {
SNMPCollectionDuration *prometheus.HistogramVec
SNMPUnexpectedPduType prometheus.Counter
SNMPDuration prometheus.Histogram
SNMPPackets prometheus.Counter
SNMPRetries prometheus.Counter
}

type NamedModule struct {
Expand All @@ -391,52 +371,12 @@ type Collector struct {
authName string
modules []*NamedModule
logger log.Logger
metrics internalMetrics
metrics Metrics
concurrency int
}

func newInternalMetrics(reg prometheus.Registerer) internalMetrics {
buckets := prometheus.ExponentialBuckets(0.0001, 2, 15)
snmpUnexpectedPduType := promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "unexpected_pdu_type_total",
Help: "Unexpected Go types in a PDU.",
},
)
snmpDuration := promauto.With(reg).NewHistogram(
prometheus.HistogramOpts{
Namespace: namespace,
Name: "packet_duration_seconds",
Help: "A histogram of latencies for SNMP packets.",
Buckets: buckets,
},
)
snmpPackets := promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "packets_total",
Help: "Number of SNMP packet sent, including retries.",
},
)
snmpRetries := promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "packet_retries_total",
Help: "Number of SNMP packet retries.",
},
)
return internalMetrics{
snmpUnexpectedPduType: snmpUnexpectedPduType,
snmpDuration: snmpDuration,
snmpPackets: snmpPackets,
snmpRetries: snmpRetries,
}
}

func New(ctx context.Context, target, authName string, auth *config.Auth, modules []*NamedModule, logger log.Logger, reg prometheus.Registerer, conc int) *Collector {
internalMetrics := newInternalMetrics(reg)
return &Collector{ctx: ctx, target: target, authName: authName, auth: auth, modules: modules, logger: logger, metrics: internalMetrics, concurrency: conc}
func New(ctx context.Context, target, authName string, auth *config.Auth, modules []*NamedModule, logger log.Logger, metrics Metrics, conc int) *Collector {
return &Collector{ctx: ctx, target: target, authName: authName, auth: auth, modules: modules, logger: logger, metrics: metrics, concurrency: conc}
}

// Describe implements Prometheus.Collector.
Expand Down Expand Up @@ -522,7 +462,7 @@ func (c Collector) Collect(ch chan<- prometheus.Metric) {
c.collect(ch, m)
duration := time.Since(start).Seconds()
level.Debug(logger).Log("msg", "Finished scrape", "duration_seconds", duration)
snmpDuration.WithLabelValues(c.authName, m.name).Observe(duration)
c.metrics.SNMPCollectionDuration.WithLabelValues(c.authName, m.name).Observe(duration)
}
}()
}
Expand Down Expand Up @@ -598,7 +538,7 @@ func parseDateAndTime(pdu *gosnmp.SnmpPDU) (float64, error) {
return float64(t.Unix()), nil
}

func pduToSamples(indexOids []int, pdu *gosnmp.SnmpPDU, metric *config.Metric, oidToPdu map[string]gosnmp.SnmpPDU, logger log.Logger, metrics internalMetrics) []prometheus.Metric {
func pduToSamples(indexOids []int, pdu *gosnmp.SnmpPDU, metric *config.Metric, oidToPdu map[string]gosnmp.SnmpPDU, logger log.Logger, metrics Metrics) []prometheus.Metric {
var err error
// The part of the OID that is the indexes.
labels := indexesToLabels(indexOids, metric, oidToPdu, metrics)
Expand Down Expand Up @@ -799,7 +739,7 @@ func splitOid(oid []int, count int) ([]int, []int) {
}

// This mirrors decodeValue in gosnmp's helper.go.
func pduValueAsString(pdu *gosnmp.SnmpPDU, typ string, metrics internalMetrics) string {
func pduValueAsString(pdu *gosnmp.SnmpPDU, typ string, metrics Metrics) string {
switch pdu.Value.(type) {
case int:
return strconv.Itoa(pdu.Value.(int))
Expand Down Expand Up @@ -837,7 +777,7 @@ func pduValueAsString(pdu *gosnmp.SnmpPDU, typ string, metrics internalMetrics)
return ""
default:
// This shouldn't happen.
metrics.snmpUnexpectedPduType.Inc()
metrics.SNMPUnexpectedPduType.Inc()
return fmt.Sprintf("%s", pdu.Value)
}
}
Expand Down Expand Up @@ -951,7 +891,7 @@ func getPrevOid(oid string) string {
return strings.Join(oids, ".")
}

func indexesToLabels(indexOids []int, metric *config.Metric, oidToPdu map[string]gosnmp.SnmpPDU, metrics internalMetrics) map[string]string {
func indexesToLabels(indexOids []int, metric *config.Metric, oidToPdu map[string]gosnmp.SnmpPDU, metrics Metrics) map[string]string {
labels := map[string]string{}
labelOids := map[string][]int{}

Expand Down
8 changes: 4 additions & 4 deletions collector/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ func TestPduToSample(t *testing.T) {
}

for _, c := range cases {
metrics := pduToSamples(c.indexOids, c.pdu, c.metric, c.oidToPdu, log.NewNopLogger(), internalMetrics{})
metrics := pduToSamples(c.indexOids, c.pdu, c.metric, c.oidToPdu, log.NewNopLogger(), Metrics{})
metric := &io_prometheus_client.Metric{}
expected := map[string]struct{}{}
for _, e := range c.expectedMetrics {
Expand Down Expand Up @@ -725,7 +725,7 @@ func TestPduValueAsString(t *testing.T) {
},
}
for _, c := range cases {
got := pduValueAsString(c.pdu, c.typ, internalMetrics{})
got := pduValueAsString(c.pdu, c.typ, Metrics{})
if !reflect.DeepEqual(got, c.result) {
t.Errorf("pduValueAsString(%v, %q): got %q, want %q", c.pdu, c.typ, got, c.result)
}
Expand Down Expand Up @@ -1031,7 +1031,7 @@ func TestIndexesToLabels(t *testing.T) {
},
}
for _, c := range cases {
got := indexesToLabels(c.oid, &c.metric, c.oidToPdu, internalMetrics{})
got := indexesToLabels(c.oid, &c.metric, c.oidToPdu, Metrics{})
if !reflect.DeepEqual(got, c.result) {
t.Errorf("indexesToLabels(%v, %v, %v): got %v, want %v", c.oid, c.metric, c.oidToPdu, got, c.result)
}
Expand Down Expand Up @@ -1249,7 +1249,7 @@ func TestFilterAllowedIndices(t *testing.T) {
},
}
for _, c := range cases {
got := filterAllowedIndices(log.NewNopLogger(), c.filter, pdus, c.allowedList, internalMetrics{})
got := filterAllowedIndices(log.NewNopLogger(), c.filter, pdus, c.allowedList, Metrics{})
if !reflect.DeepEqual(got, c.result) {
t.Errorf("filterAllowedIndices(%v): got %v, want %v", c.filter, got, c.result)
}
Expand Down
63 changes: 57 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ import (
"github.com/prometheus/snmp_exporter/config"
)

const (
namespace = "snmp"
)

var (
configFile = kingpin.Flag("config.file", "Path to configuration file.").Default("snmp.yml").Strings()
dryRun = kingpin.Flag("dry-run", "Only verify configuration is valid and exit.").Default("false").Bool()
Expand All @@ -53,10 +57,19 @@ var (
// Metrics about the SNMP exporter itself.
snmpRequestErrors = promauto.NewCounter(
prometheus.CounterOpts{
Name: "snmp_request_errors_total",
Help: "Errors in requests to the SNMP exporter",
Namespace: namespace,
Name: "request_errors_total",
Help: "Errors in requests to the SNMP exporter",
},
)
snmpCollectionDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Name: "collection_duration_seconds",
Help: "Duration of collections by the SNMP exporter",
},
[]string{"auth", "module"},
)
sc = &SafeConfig{
C: &config.Config{},
}
Expand All @@ -68,7 +81,7 @@ const (
configPath = "/config"
)

func handler(w http.ResponseWriter, r *http.Request, logger log.Logger) {
func handler(w http.ResponseWriter, r *http.Request, logger log.Logger, exporterMetrics collector.Metrics) {
query := r.URL.Query()

target := query.Get("target")
Expand Down Expand Up @@ -127,7 +140,7 @@ func handler(w http.ResponseWriter, r *http.Request, logger log.Logger) {
sc.RUnlock()
logger = log.With(logger, "auth", authName, "target", target)
registry := prometheus.NewRegistry()
c := collector.New(r.Context(), target, authName, auth, nmodules, logger, registry, *concurrency)
c := collector.New(r.Context(), target, authName, auth, nmodules, logger, exporterMetrics, *concurrency)
registry.MustRegister(c)
// Delegate http serving to Prometheus client library, which will call collector.Collect.
h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
Expand Down Expand Up @@ -160,7 +173,11 @@ func (sc *SafeConfig) ReloadConfig(configFile []string) (err error) {
sc.Lock()
sc.C = conf
// Initialize metrics.
collector.InitModuleMetrics(sc.C.Auths, sc.C.Modules)
for auth := range sc.C.Auths {
for module := range sc.C.Modules {
snmpCollectionDuration.WithLabelValues(auth, module)
}
}
sc.Unlock()
return nil
}
Expand Down Expand Up @@ -219,10 +236,44 @@ func main() {
}
}()

buckets := prometheus.ExponentialBuckets(0.0001, 2, 15)
exporterMetrics := collector.Metrics{
SNMPCollectionDuration: snmpCollectionDuration,
SNMPUnexpectedPduType: promauto.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "unexpected_pdu_type_total",
Help: "Unexpected Go types in a PDU.",
},
),
SNMPDuration: promauto.NewHistogram(
prometheus.HistogramOpts{
Namespace: namespace,
Name: "packet_duration_seconds",
Help: "A histogram of latencies for SNMP packets.",
Buckets: buckets,
},
),
SNMPPackets: promauto.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "packets_total",
Help: "Number of SNMP packet sent, including retries.",
},
),
SNMPRetries: promauto.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "packet_retries_total",
Help: "Number of SNMP packet retries.",
},
),
}

http.Handle(*metricsPath, promhttp.Handler()) // Normal metrics endpoint for SNMP exporter itself.
// Endpoint to do SNMP scrapes.
http.HandleFunc(proberPath, func(w http.ResponseWriter, r *http.Request) {
handler(w, r, logger)
handler(w, r, logger, exporterMetrics)
})
http.HandleFunc("/-/reload", updateConfiguration) // Endpoint to reload configuration.

Expand Down

0 comments on commit 2dee7c4

Please sign in to comment.