Skip to content

Commit 49b882a

Browse files
committed
chore(websocket): logging and metrics improvement (jetkvm#347)
* chore(websocket): only show warning if websocket is closed abnormally * chore(websocket): add counter for ping requests received
1 parent b9b0c33 commit 49b882a

File tree

2 files changed

+50
-9
lines changed

2 files changed

+50
-9
lines changed

cloud.go

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"context"
66
"encoding/json"
7+
"errors"
78
"fmt"
89
"net/http"
910
"net/url"
@@ -59,6 +60,13 @@ var (
5960
},
6061
[]string{"type", "source"},
6162
)
63+
metricConnectionLastPingReceivedTimestamp = promauto.NewGaugeVec(
64+
prometheus.GaugeOpts{
65+
Name: "jetkvm_connection_last_ping_received_timestamp",
66+
Help: "The timestamp when the last ping request was received",
67+
},
68+
[]string{"type", "source"},
69+
)
6270
metricConnectionLastPingDuration = promauto.NewGaugeVec(
6371
prometheus.GaugeOpts{
6472
Name: "jetkvm_connection_last_ping_duration",
@@ -76,16 +84,23 @@ var (
7684
},
7785
[]string{"type", "source"},
7886
)
79-
metricConnectionTotalPingCount = promauto.NewCounterVec(
87+
metricConnectionTotalPingSentCount = promauto.NewCounterVec(
8088
prometheus.CounterOpts{
81-
Name: "jetkvm_connection_total_ping_count",
89+
Name: "jetkvm_connection_total_ping_sent",
8290
Help: "The total number of pings sent to the connection",
8391
},
8492
[]string{"type", "source"},
8593
)
94+
metricConnectionTotalPingReceivedCount = promauto.NewCounterVec(
95+
prometheus.CounterOpts{
96+
Name: "jetkvm_connection_total_ping_received",
97+
Help: "The total number of pings received from the connection",
98+
},
99+
[]string{"type", "source"},
100+
)
86101
metricConnectionSessionRequestCount = promauto.NewCounterVec(
87102
prometheus.CounterOpts{
88-
Name: "jetkvm_connection_session_total_request_count",
103+
Name: "jetkvm_connection_session_total_requests",
89104
Help: "The total number of session requests received",
90105
},
91106
[]string{"type", "source"},
@@ -131,6 +146,8 @@ func wsResetMetrics(established bool, sourceType string, source string) {
131146
metricConnectionLastPingTimestamp.WithLabelValues(sourceType, source).Set(-1)
132147
metricConnectionLastPingDuration.WithLabelValues(sourceType, source).Set(-1)
133148

149+
metricConnectionLastPingReceivedTimestamp.WithLabelValues(sourceType, source).Set(-1)
150+
134151
metricConnectionLastSessionRequestTimestamp.WithLabelValues(sourceType, source).Set(-1)
135152
metricConnectionLastSessionRequestDuration.WithLabelValues(sourceType, source).Set(-1)
136153

@@ -277,20 +294,29 @@ func runWebsocketClient() error {
277294
HTTPHeader: header,
278295
OnPingReceived: func(ctx context.Context, payload []byte) bool {
279296
websocketLogger.Infof("ping frame received: %v, source: %s, sourceType: cloud", payload, wsURL.Host)
297+
298+
metricConnectionTotalPingReceivedCount.WithLabelValues("cloud", wsURL.Host).Inc()
299+
metricConnectionLastPingReceivedTimestamp.WithLabelValues("cloud", wsURL.Host).SetToCurrentTime()
300+
280301
return true
281302
},
282303
})
304+
// if the context is canceled, we don't want to return an error
283305
if err != nil {
306+
if errors.Is(err, context.Canceled) {
307+
cloudLogger.Infof("websocket connection canceled")
308+
return nil
309+
}
284310
return err
285311
}
286312
defer c.CloseNow() //nolint:errcheck
287313
cloudLogger.Infof("websocket connected to %s", wsURL)
288314

289315
// set the metrics when we successfully connect to the cloud.
290-
wsResetMetrics(true, "cloud", "")
316+
wsResetMetrics(true, "cloud", wsURL.Host)
291317

292318
// we don't have a source for the cloud connection
293-
return handleWebRTCSignalWsMessages(c, true, "")
319+
return handleWebRTCSignalWsMessages(c, true, wsURL.Host)
294320
}
295321

296322
func authenticateSession(ctx context.Context, c *websocket.Conn, req WebRTCSessionRequest) error {
@@ -379,9 +405,6 @@ func handleSessionRequest(ctx context.Context, c *websocket.Conn, req WebRTCSess
379405

380406
func RunWebsocketClient() {
381407
for {
382-
// reset the metrics when we start the websocket client.
383-
wsResetMetrics(false, "cloud", "")
384-
385408
// If the cloud token is not set, we don't need to run the websocket client.
386409
if config.CloudToken == "" {
387410
time.Sleep(5 * time.Second)

web.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"embed"
77
"encoding/json"
8+
"errors"
89
"fmt"
910
"io/fs"
1011
"net/http"
@@ -190,6 +191,10 @@ func handleLocalWebRTCSignal(c *gin.Context) {
190191
InsecureSkipVerify: true, // Allow connections from any origin
191192
OnPingReceived: func(ctx context.Context, payload []byte) bool {
192193
websocketLogger.Infof("ping frame received: %v, source: %s, sourceType: local", payload, source)
194+
195+
metricConnectionTotalPingReceivedCount.WithLabelValues("local", source).Inc()
196+
metricConnectionLastPingReceivedTimestamp.WithLabelValues("local", source).SetToCurrentTime()
197+
193198
return true
194199
},
195200
}
@@ -251,6 +256,15 @@ func handleWebRTCSignalWsMessages(wsCon *websocket.Conn, isCloudConnection bool,
251256
for {
252257
time.Sleep(WebsocketPingInterval)
253258

259+
if ctxErr := runCtx.Err(); ctxErr != nil {
260+
if !errors.Is(ctxErr, context.Canceled) {
261+
logWarnf("websocket connection closed: %v", ctxErr)
262+
} else {
263+
logTracef("websocket connection closed as the context was canceled: %v")
264+
}
265+
return
266+
}
267+
254268
// set the timer for the ping duration
255269
timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) {
256270
metricConnectionLastPingDuration.WithLabelValues(sourceType, source).Set(v)
@@ -269,7 +283,7 @@ func handleWebRTCSignalWsMessages(wsCon *websocket.Conn, isCloudConnection bool,
269283
// dont use `defer` here because we want to observe the duration of the ping
270284
duration := timer.ObserveDuration()
271285

272-
metricConnectionTotalPingCount.WithLabelValues(sourceType, source).Inc()
286+
metricConnectionTotalPingSentCount.WithLabelValues(sourceType, source).Inc()
273287
metricConnectionLastPingTimestamp.WithLabelValues(sourceType, source).SetToCurrentTime()
274288

275289
logTracef("received pong frame, duration: %v", duration)
@@ -317,6 +331,10 @@ func handleWebRTCSignalWsMessages(wsCon *websocket.Conn, isCloudConnection bool,
317331
logWarnf("unable to write pong message: %v", err)
318332
return err
319333
}
334+
335+
metricConnectionTotalPingReceivedCount.WithLabelValues(sourceType, source).Inc()
336+
metricConnectionLastPingReceivedTimestamp.WithLabelValues(sourceType, source).SetToCurrentTime()
337+
320338
continue
321339
}
322340

0 commit comments

Comments
 (0)