Skip to content

Commit

Permalink
Add some new metrics (#56)
Browse files Browse the repository at this point in the history
Adds a few new metrics:

- `otr_pubsub_last_command_duration_seconds`
- `otr_oplog_last_entry_staleness_seconds`
- `otr_buffer_available`

This has been baking in staging since before the weekend and seems to be
working nicely.
  • Loading branch information
torywheelwright committed Apr 3, 2024
1 parent cdac03e commit eda10da
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 3 deletions.
2 changes: 1 addition & 1 deletion default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

buildGoModule {
pname = "oplogtoredis";
version = "3.2.0";
version = "3.3.0";
src = builtins.path { path = ./.; };

postInstall = ''
Expand Down
8 changes: 8 additions & 0 deletions lib/oplog/tail.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ var (
ReportInterval: 1 * time.Minute,
},
}, []string{"database", "status"})

metricLastOplogEntryStaleness = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: "otr",
Subsystem: "oplog",
Name: "last_entry_staleness_seconds",
Help: "Gauge recording the difference between this server's clock and the timestamp on the last read oplog entry.",
})
)

func init() {
Expand Down Expand Up @@ -347,6 +354,7 @@ func (tailer *Tailer) unmarshalEntry(rawData bson.Raw) (timestamp *primitive.Tim

metricOplogEntriesBySize.WithLabelValues(database, status).Observe(messageLen)
metricMaxOplogEntryByMinute.Report(messageLen, database, status)
metricLastOplogEntryStaleness.Set(float64(time.Since(time.Unix(int64(timestamp.T), 0))))
}()

if len(entries) > 0 {
Expand Down
12 changes: 11 additions & 1 deletion lib/redispub/publisher.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ var metricTemporaryFailures = promauto.NewCounter(prometheus.CounterOpts{
Help: "Number of failures encountered when trying to send a message. We automatically retry, and only register a permanent failure (in otr_redispub_processed_messages) after 30 failures.",
})

var metricLastCommandDuration = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: "otr",
Subsystem: "redispub",
Name: "last_command_duration_seconds",
Help: "The round trip time in seconds of the most recent write to Redis.",
})

// PublishStream reads Publications from the given channel and publishes them
// to Redis.
func PublishStream(clients []redis.UniversalClient, in <-chan *Publication, opts *PublishOpts, stop <-chan bool) {
Expand Down Expand Up @@ -93,7 +100,7 @@ func PublishStream(clients []redis.UniversalClient, in <-chan *Publication, opts
for i,publishFn := range publishFns {
err := publishSingleMessageWithRetries(p, 30, time.Second, publishFn)
log.Log.Debugw("Published to", "idx", i)


if err != nil {
metricSendFailed.Inc()
Expand Down Expand Up @@ -140,6 +147,8 @@ func publishSingleMessageWithRetries(p *Publication, maxRetries int, sleepTime t
}

func publishSingleMessage(p *Publication, client redis.UniversalClient, prefix string, dedupeExpirationSeconds int) error {
start := time.Now()

_, err := publishDedupe.Run(
context.Background(),
client,
Expand All @@ -159,6 +168,7 @@ func publishSingleMessage(p *Publication, client redis.UniversalClient, prefix s
strings.Join(p.Channels, "$"), // ARGV[3], channels
).Result()

metricLastCommandDuration.Set(time.Since(start).Seconds())
return err
}

Expand Down
14 changes: 13 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (

"github.com/go-redis/redis/v8"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.uber.org/zap"
)
Expand Down Expand Up @@ -76,7 +78,17 @@ func main() {
// and sends them to Redis.
//
// TODO PERF: Use a leaky buffer (https://github.com/tulip/oplogtoredis/issues/2)
redisPubs := make(chan *redispub.Publication, 10000)
bufferSize := 10000
redisPubs := make(chan *redispub.Publication, bufferSize)

promauto.NewGaugeFunc(prometheus.GaugeOpts{
Namespace: "otr",
Name: "buffer_available",
Help: "Gauge indicating the available space in the buffer of oplog entries waiting to be written to redis.",
}, func () float64 {
return float64(bufferSize - len(redisPubs))
})

waitGroup := sync.WaitGroup{}

stopOplogTail := make(chan bool)
Expand Down

0 comments on commit eda10da

Please sign in to comment.