From bd42be9ea3065e0be4191430c11bf598250ab638 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 6 Jun 2024 16:48:20 +0200 Subject: [PATCH 1/5] catchErrors middleware: Always record metrics Instead of relying on `Metrics`, use top-level metric registered using `unsafeRegister`. --- .../src/Network/Wai/Utilities/Server.hs | 57 +++++++++++-------- services/brig/src/Brig/Run.hs | 2 +- services/cannon/src/Cannon/Run.hs | 2 +- services/cargohold/src/CargoHold/Run.hs | 2 +- .../federator/src/Federator/Interpreter.hs | 2 +- services/galley/src/Galley/Run.hs | 2 +- services/gundeck/src/Gundeck/Run.hs | 2 +- services/proxy/src/Proxy/Run.hs | 2 +- services/spar/src/Spar/Run.hs | 2 +- 9 files changed, 40 insertions(+), 33 deletions(-) diff --git a/libs/wai-utilities/src/Network/Wai/Utilities/Server.hs b/libs/wai-utilities/src/Network/Wai/Utilities/Server.hs index 1425eec16a8..95657f59dd5 100644 --- a/libs/wai-utilities/src/Network/Wai/Utilities/Server.hs +++ b/libs/wai-utilities/src/Network/Wai/Utilities/Server.hs @@ -33,7 +33,6 @@ module Network.Wai.Utilities.Server requestIdMiddleware, catchErrors, catchErrorsWithRequestId, - OnErrorMetrics, heavyDebugLogging, rethrow5xx, lazyResponseBody, @@ -64,7 +63,6 @@ import Data.ByteString.Char8 qualified as C import Data.ByteString.Lazy qualified as LBS import Data.Domain (domainText) import Data.Metrics.GC (spawnGCMetricsCollector) -import Data.Metrics.Middleware import Data.Streaming.Zlib (ZlibException (..)) import Data.Text.Encoding qualified as Text import Data.Text.Encoding.Error (lenientDecode) @@ -87,7 +85,7 @@ import Network.Wai.Utilities.Error qualified as Wai import Network.Wai.Utilities.JSONResponse import Network.Wai.Utilities.Request (lookupRequestId) import Network.Wai.Utilities.Response -import Prometheus qualified as Prm +import Prometheus qualified as Prom import System.Logger qualified as Log import System.Logger.Class hiding (Error, Settings, format) import System.Posix.Signals (installHandler, sigINT, sigTERM) @@ -100,18 +98,14 @@ data Server = Server { serverHost :: String, serverPort :: Word16, serverLogger :: Logger, - serverMetrics :: Metrics, serverTimeout :: Maybe Int } -defaultServer :: String -> Word16 -> Logger -> Metrics -> Server -defaultServer h p l m = Server h p l m Nothing +defaultServer :: String -> Word16 -> Logger -> Server +defaultServer h p l = Server h p l Nothing newSettings :: MonadIO m => Server -> m Settings -newSettings (Server h p l m t) = do - -- (Atomically) initialise the standard metrics, to avoid races. - void $ gaugeGet (path "net.connections") m - void $ counterGet (path "net.errors") m +newSettings (Server h p l t) = do pure $ setHost (fromString h) . setPort (fromIntegral p) @@ -121,12 +115,22 @@ newSettings (Server h p l m t) = do . setTimeout (fromMaybe 300 t) $ defaultSettings where - connStart = gaugeIncr (path "net.connections") m - connEnd = gaugeDecr (path "net.connections") m + connStart = Prom.incGauge netConnections + connEnd = Prom.decGauge netConnections logStart = Log.info l . msg $ val "Listening on " +++ h +++ ':' +++ p +{-# NOINLINE netConnections #-} +netConnections :: Prom.Gauge +netConnections = + Prom.unsafeRegister $ + Prom.gauge + Prom.Info + { Prom.metricName = "net.connections", + Prom.metricHelp = "Number of active connections" + } + -- Run a WAI 'Application', initiating Warp's graceful shutdown -- on receiving either the INT or TERM signals. After closing -- the listen socket, Warp will be allowed to drain existing @@ -206,8 +210,8 @@ requestIdMiddleware logger reqIdHeaderName origApp req responder = let reqWithId = req {requestHeaders = (reqIdHeaderName, reqId) : req.requestHeaders} origApp reqWithId responder -catchErrors :: Logger -> HeaderName -> OnErrorMetrics -> Middleware -catchErrors l reqIdHeaderName m = catchErrorsWithRequestId (lookupRequestId reqIdHeaderName) l m +catchErrors :: Logger -> HeaderName -> Middleware +catchErrors l reqIdHeaderName = catchErrorsWithRequestId (lookupRequestId reqIdHeaderName) l -- | Create a middleware that catches exceptions and turns -- them into appropriate 'Error' responses, thereby logging @@ -219,9 +223,8 @@ catchErrors l reqIdHeaderName m = catchErrorsWithRequestId (lookupRequestId reqI catchErrorsWithRequestId :: (Request -> Maybe ByteString) -> Logger -> - OnErrorMetrics -> Middleware -catchErrorsWithRequestId getRequestId l m app req k = +catchErrorsWithRequestId getRequestId l app req k = rethrow5xx getRequestId l app req k `catch` errorResponse where mReqId = getRequestId req @@ -229,7 +232,7 @@ catchErrorsWithRequestId getRequestId l m app req k = errorResponse :: SomeException -> IO ResponseReceived errorResponse ex = do er <- runHandlers ex errorHandlers - onError l mReqId m req k er + onError l mReqId req k er {-# INLINEABLE catchErrors #-} @@ -374,31 +377,35 @@ lazyResponseBody rs = case responseToStream rs of -------------------------------------------------------------------------------- -- Utilities --- | 'onError' and 'catchErrors' support both the metrics-core ('Right') and the prometheus --- package introduced for spar ('Left'). -type OnErrorMetrics = [Either Prm.Counter Metrics] - -- | Send an 'Error' response. onError :: MonadIO m => Logger -> Maybe ByteString -> - OnErrorMetrics -> Request -> Continue IO -> Either Wai.Error JSONResponse -> m ResponseReceived -onError g mReqId m r k e = liftIO $ do +onError g mReqId r k e = liftIO $ do case e of Left we -> logError' g mReqId we Right jr -> logJSONResponse g mReqId jr let resp = either waiErrorToJSONResponse id e let code = statusCode (resp.status) - when (code >= 500) $ - either Prm.incCounter (counterIncr (path "net.errors")) `mapM_` m + when (code >= 500) $ Prom.incCounter netErrors flushRequestBody r k (jsonResponseToWai resp) +{-# NOINLINE netErrors #-} +netErrors :: Prom.Counter +netErrors = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "net.errors", + Prom.metricHelp = "Number of exceptions caught by catchErrors middleware" + } + defaultRequestIdHeaderName :: HeaderName defaultRequestIdHeaderName = "Request-Id" diff --git a/services/brig/src/Brig/Run.hs b/services/brig/src/Brig/Run.hs index 8c0c6facda0..8e85ea01689 100644 --- a/services/brig/src/Brig/Run.hs +++ b/services/brig/src/Brig/Run.hs @@ -124,7 +124,7 @@ mkApp o = do . Metrics.servantPrometheusMiddleware (Proxy @ServantCombinedAPI) . GZip.gunzip . GZip.gzip GZip.def - . catchErrors (e ^. applog) defaultRequestIdHeaderName [Right $ e ^. metrics] + . catchErrors (e ^. applog) defaultRequestIdHeaderName -- the servant API wraps the one defined using wai-routing servantApp :: Env -> Wai.Application diff --git a/services/cannon/src/Cannon/Run.hs b/services/cannon/src/Cannon/Run.hs index c3ec4f6f4d5..7ba53d95e9f 100644 --- a/services/cannon/src/Cannon/Run.hs +++ b/services/cannon/src/Cannon/Run.hs @@ -85,7 +85,7 @@ run o = do . requestIdMiddleware g defaultRequestIdHeaderName . servantPrometheusMiddleware (Proxy @CombinedAPI) . Gzip.gzip Gzip.def - . catchErrors g defaultRequestIdHeaderName [Right m] + . catchErrors g defaultRequestIdHeaderName app :: Application app = middleware (serve (Proxy @CombinedAPI) server) server :: Servant.Server CombinedAPI diff --git a/services/cargohold/src/CargoHold/Run.hs b/services/cargohold/src/CargoHold/Run.hs index 783199cccb3..a3eb898a75c 100644 --- a/services/cargohold/src/CargoHold/Run.hs +++ b/services/cargohold/src/CargoHold/Run.hs @@ -81,7 +81,7 @@ mkApp o = Codensity $ \k -> . requestIdMiddleware (e ^. appLogger) defaultRequestIdHeaderName . servantPrometheusMiddleware (Proxy @CombinedAPI) . GZip.gzip GZip.def - . catchErrors (e ^. appLogger) defaultRequestIdHeaderName [Right $ e ^. metrics] + . catchErrors (e ^. appLogger) defaultRequestIdHeaderName servantApp :: Env -> Application servantApp e0 r cont = do let rid = getRequestId defaultRequestIdHeaderName r diff --git a/services/federator/src/Federator/Interpreter.hs b/services/federator/src/Federator/Interpreter.hs index 089ef1ff07e..25923a7e824 100644 --- a/services/federator/src/Federator/Interpreter.hs +++ b/services/federator/src/Federator/Interpreter.hs @@ -114,7 +114,7 @@ serveServant env port server = do hoistServerWithContext (Proxy @api) (Proxy @'[]) (runFederator env rid) server Warp.run port . requestIdMiddleware env._applog federationRequestIdHeaderName - . Wai.catchErrors (view applog env) federationRequestIdHeaderName [] + . Wai.catchErrors (view applog env) federationRequestIdHeaderName . Metrics.servantPrometheusMiddleware (Proxy @api) $ app hoistApp where diff --git a/services/galley/src/Galley/Run.hs b/services/galley/src/Galley/Run.hs index 8110fc4454a..7557497fd23 100644 --- a/services/galley/src/Galley/Run.hs +++ b/services/galley/src/Galley/Run.hs @@ -99,7 +99,7 @@ mkApp opts = . servantPrometheusMiddleware (Proxy @CombinedAPI) . GZip.gunzip . GZip.gzip GZip.def - . catchErrors logger defaultRequestIdHeaderName [Right metrics] + . catchErrors logger defaultRequestIdHeaderName Codensity $ \k -> finally (k ()) $ do Log.info logger $ Log.msg @Text "Galley application finished." Log.flush logger diff --git a/services/gundeck/src/Gundeck/Run.hs b/services/gundeck/src/Gundeck/Run.hs index cff04418894..6702c9045d4 100644 --- a/services/gundeck/src/Gundeck/Run.hs +++ b/services/gundeck/src/Gundeck/Run.hs @@ -90,7 +90,7 @@ run o = do . waiPrometheusMiddleware sitemap . GZip.gunzip . GZip.gzip GZip.def - . catchErrors (e ^. applog) defaultRequestIdHeaderName [Right $ e ^. monitor] + . catchErrors (e ^. applog) defaultRequestIdHeaderName type CombinedAPI = GundeckAPI :<|> Servant.Raw diff --git a/services/proxy/src/Proxy/Run.hs b/services/proxy/src/Proxy/Run.hs index 2058052a059..95881999e95 100644 --- a/services/proxy/src/Proxy/Run.hs +++ b/services/proxy/src/Proxy/Run.hs @@ -46,5 +46,5 @@ run o = do . requestIdMiddleware (e ^. applog) defaultRequestIdHeaderName . waiPrometheusMiddleware (sitemap e) . GZip.gunzip - . catchErrors (e ^. applog) defaultRequestIdHeaderName [Right m] + . catchErrors (e ^. applog) defaultRequestIdHeaderName runSettingsWithShutdown s (middleware app) Nothing `finally` destroyEnv e diff --git a/services/spar/src/Spar/Run.hs b/services/spar/src/Spar/Run.hs index 8b55c3ce603..f07ca3ce871 100644 --- a/services/spar/src/Spar/Run.hs +++ b/services/spar/src/Spar/Run.hs @@ -112,7 +112,7 @@ mkApp sparCtxOpts = do . WU.heavyDebugLogging heavyLogOnly logLevel sparCtxLogger defaultRequestIdHeaderName . servantPrometheusMiddleware (Proxy @SparAPI) . GZip.gunzip - . WU.catchErrors sparCtxLogger defaultRequestIdHeaderName [] + . WU.catchErrors sparCtxLogger defaultRequestIdHeaderName -- Error 'Response's are usually not thrown as exceptions, but logged in -- 'renderSparErrorWithLogging' before the 'Application' can construct a 'Response' -- value, when there is still all the type information around. 'WU.catchErrors' is From cf73ffe43a6678c3558c147adf27d00b0eadb961 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Thu, 6 Jun 2024 16:52:06 +0200 Subject: [PATCH 2/5] Use `unsafeRegister` for metrics instead of bunch of IORef HashMaps --- libs/metrics-core/metrics-core.cabal | 7 +- libs/metrics-core/src/Data/Metrics.hs | 295 ------------------ libs/metrics-core/src/Data/Metrics/AWS.hs | 18 +- libs/metrics-wai/metrics-wai.cabal | 3 - .../src/Data/Metrics/Middleware.hs | 80 ----- libs/wai-utilities/wai-utilities.cabal | 1 - .../background-worker/background-worker.cabal | 1 - .../src/Wire/BackgroundWorker.hs | 2 +- .../src/Wire/BackgroundWorker/Env.hs | 3 - .../Wire/BackendNotificationPusherSpec.hs | 2 - .../background-worker/test/Test/Wire/Util.hs | 1 - services/brig/brig.cabal | 1 + services/brig/src/Brig/API/Public.hs | 6 +- services/brig/src/Brig/API/User.hs | 38 ++- services/brig/src/Brig/App.hs | 22 +- services/brig/src/Brig/Data/Client.hs | 51 ++- services/brig/src/Brig/Index/Eval.hs | 4 +- services/brig/src/Brig/Index/Migrations.hs | 2 - .../brig/src/Brig/Index/Migrations/Types.hs | 4 +- services/brig/src/Brig/Phone.hs | 38 ++- services/brig/src/Brig/Run.hs | 5 +- services/brig/src/Brig/User/Auth/Cookie.hs | 20 +- services/brig/src/Brig/User/Phone.hs | 19 +- services/brig/src/Brig/User/Search/Index.hs | 120 +++++-- services/cannon/cannon.cabal | 1 + services/cannon/src/Cannon/Run.hs | 24 +- services/cannon/src/Cannon/Types.hs | 18 +- services/cargohold/cargohold.cabal | 1 + services/cargohold/src/CargoHold/App.hs | 11 +- services/cargohold/src/CargoHold/Metrics.hs | 35 ++- services/cargohold/src/CargoHold/Run.hs | 10 +- services/federator/federator.cabal | 1 - services/federator/src/Federator/Env.hs | 4 +- services/federator/src/Federator/Run.hs | 2 - services/galley/galley.cabal | 1 + services/galley/src/Galley/App.hs | 8 +- services/galley/src/Galley/Env.hs | 2 - services/galley/src/Galley/Monad.hs | 4 +- services/galley/src/Galley/Run.hs | 28 +- services/gundeck/gundeck.cabal | 2 +- services/gundeck/src/Gundeck/Env.hs | 8 +- services/gundeck/src/Gundeck/Monad.hs | 6 +- services/gundeck/src/Gundeck/Push.hs | 3 +- services/gundeck/src/Gundeck/Push/Native.hs | 74 ++++- .../gundeck/src/Gundeck/Push/Websocket.hs | 23 +- services/gundeck/src/Gundeck/Run.hs | 17 +- .../src/Gundeck/ThreadBudget/Internal.hs | 91 ++++-- services/gundeck/test/integration/Util.hs | 4 +- services/gundeck/test/unit/ThreadBudget.hs | 7 +- services/proxy/src/Proxy/Env.hs | 9 +- services/proxy/src/Proxy/Run.hs | 6 +- tools/stern/src/Stern/API.hs | 2 +- tools/stern/src/Stern/App.hs | 5 +- tools/stern/stern.cabal | 1 - 54 files changed, 522 insertions(+), 629 deletions(-) delete mode 100644 libs/metrics-core/src/Data/Metrics.hs delete mode 100644 libs/metrics-wai/src/Data/Metrics/Middleware.hs diff --git a/libs/metrics-core/metrics-core.cabal b/libs/metrics-core/metrics-core.cabal index 278c23d2a84..67ef9011baa 100644 --- a/libs/metrics-core/metrics-core.cabal +++ b/libs/metrics-core/metrics-core.cabal @@ -12,7 +12,6 @@ build-type: Simple library exposed-modules: - Data.Metrics Data.Metrics.AWS Data.Metrics.GC @@ -66,14 +65,10 @@ library -Wredundant-constraints -Wunused-packages build-depends: - base >=4.9 - , containers - , hashable >=1.2 + base >=4.9 , immortal , imports , prometheus-client - , text >=0.11 , time - , unordered-containers >=0.2 default-language: GHC2021 diff --git a/libs/metrics-core/src/Data/Metrics.hs b/libs/metrics-core/src/Data/Metrics.hs deleted file mode 100644 index 1a7c1726183..00000000000 --- a/libs/metrics-core/src/Data/Metrics.hs +++ /dev/null @@ -1,295 +0,0 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE OverloadedStrings #-} - --- This file is part of the Wire Server implementation. --- --- Copyright (C) 2022 Wire Swiss GmbH --- --- This program is free software: you can redistribute it and/or modify it under --- the terms of the GNU Affero General Public License as published by the Free --- Software Foundation, either version 3 of the License, or (at your option) any --- later version. --- --- This program is distributed in the hope that it will be useful, but WITHOUT --- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS --- FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more --- details. --- --- You should have received a copy of the GNU Affero General Public License along --- with this program. If not, see . - -module Data.Metrics - ( -- * Types - Path, - Metrics, - Histogram, - Counter, - Gauge, - - -- * Counters - counterGet, - counterAdd, - counterIncr, - counterValue, - - -- * Gauges - gaugeGet, - gaugeAdd, - gaugeSub, - gaugeIncr, - gaugeDecr, - gaugeSet, - gaugeValue, - - -- * Histograms - - -- ** Types - HistogramInfo, - Buckets, - Bucket, - - -- ** Describing Histograms - linearHistogram, - customHistogram, - - -- ** Manipulating Histograms - histoGet, - histoSubmit, - histoValue, - histoTimeAction, - - -- * Helper functions - path, - metrics, - ) -where - -import Data.HashMap.Strict qualified as HM -import Data.Hashable -import Data.Map.Strict qualified as M -import Data.Text qualified as T -import Imports hiding (lookup, union) -import Prometheus qualified as P - --- | Internal Counter type -newtype Counter = Counter P.Counter - --- | Internal Gauge type -newtype Gauge = Gauge P.Gauge - --- | Internal Histogram type -newtype Histogram = Histogram P.Histogram - --- | Represents a descriptive metric path or name. --- --- NOTE: Until all metrics are fully migrated to Prometheus this should be a valid --- name according to collectd; e.g. @net.resources./teams/invitations/info@ --- All names are converted into valid prometheus names when needed via 'toInfo' -newtype Path = Path - { _path :: Text - } - deriving (Eq, Show, Hashable, Semigroup, Monoid) - --- | Create a path -path :: Text -> Path -path = Path - --- | Opaque storage of metrics -data Metrics = Metrics - { counters :: IORef (HashMap Path Counter), - gauges :: IORef (HashMap Path Gauge), - histograms :: IORef (HashMap Path Histogram) - } - deriving (Generic) - --- Initialize an empty set of metrics -metrics :: MonadIO m => m Metrics -metrics = - liftIO $ - Metrics - <$> newIORef HM.empty - <*> newIORef HM.empty - <*> newIORef HM.empty - --- | Converts a CollectD style 'path' to a Metric name usable by prometheus --- This is to provide back compatibility with the previous collect-d metric names --- which often had paths and dot-separated names. --- --- See the spec for valid prometheus names: --- https://prometheus.io/docs/concepts/data_model/ --- --- E.g. we sanitize a metric name like "net.resources._conversations_:cnv-members_:usr.DELETE.time.960" --- into: "net_resources_conversations_:cnv_members_:usr_delete_time_960" -toInfo :: Path -> P.Info -toInfo (Path p) = - P.Info - ( p - & T.map sanitize - & ensureValidStartingChar - & collapseMultipleUnderscores - & T.toLower - ) - "description not provided" - where - ensureValidStartingChar :: Text -> Text - ensureValidStartingChar = T.dropWhile (not . validStartingChar) - validStartingChar :: Char -> Bool - validStartingChar c = isAlpha c || c `elem` ['_', ':'] - collapseMultipleUnderscores :: Text -> Text - collapseMultipleUnderscores = T.intercalate "_" . filter (not . T.null) . T.splitOn "_" - sanitize :: Char -> Char - sanitize ':' = ':' - sanitize c - | isAlphaNum c = c - | otherwise = '_' - --- | Checks whether a given key exists in a mutable hashmap (i.e. one inside an IORef) --- If it exists it is returned, if it does not then one is initialized using the provided --- initializer, then stored, then returned. -getOrCreate :: (MonadIO m, Hashable k) => IORef (HashMap k v) -> k -> IO v -> m v -getOrCreate mapRef key initializer = liftIO $ do - hMap <- readIORef mapRef - maybe initialize pure (HM.lookup key hMap) - where - initialize = do - val <- initializer - atomicModifyIORef' mapRef $ \m -> (HM.insert key val m, val) - ------------------------------------------------------------------------------ --- Counter specifics - --- | Create a counter for a 'Path' -newCounter :: Path -> IO Counter -newCounter p = Counter <$> P.register (P.counter $ toInfo p) - --- | Access the counter for a given 'Path' -counterGet :: MonadIO m => Path -> Metrics -> m Counter -counterGet p m = getOrCreate (counters m) p (newCounter p) - --- | Add the given amount to the counter at 'Path' -counterAdd :: MonadIO m => Double -> Path -> Metrics -> m () -counterAdd x p m = liftIO $ do - Counter c <- counterGet p m - void $ P.addCounter c x - --- | Add 1 to the counter at 'Path' -counterIncr :: MonadIO m => Path -> Metrics -> m () -counterIncr = counterAdd 1 - --- | Get the current value of the Counter -counterValue :: MonadIO m => Counter -> m Double -counterValue (Counter c) = P.getCounter c - ------------------------------------------------------------------------------ --- Gauge specifics - --- | Create a gauge for a 'Path' -newGauge :: Path -> IO Gauge -newGauge p = Gauge <$> P.register (P.gauge $ toInfo p) - --- | Access the gauge for a given 'Path' -gaugeGet :: MonadIO m => Path -> Metrics -> m Gauge -gaugeGet p m = getOrCreate (gauges m) p (newGauge p) - --- | Set the 'Gauge' at 'Path' to the given value -gaugeSet :: MonadIO m => Double -> Path -> Metrics -> m () -gaugeSet x p m = liftIO $ do - Gauge g <- gaugeGet p m - P.setGauge g x - --- | Add the given amount to the gauge at 'Path' -gaugeAdd :: MonadIO m => Double -> Path -> Metrics -> m () -gaugeAdd x p m = liftIO $ do - Gauge g <- gaugeGet p m - P.addGauge g x - --- | Add 1 to the gauge at 'Path' -gaugeIncr :: MonadIO m => Path -> Metrics -> m () -gaugeIncr = gaugeAdd 1 - --- | Subtract 1 from the gauge at 'Path' -gaugeDecr :: MonadIO m => Path -> Metrics -> m () -gaugeDecr = gaugeAdd (-1) - --- | Subtract the given amount from the gauge at 'Path' -gaugeSub :: MonadIO m => Double -> Path -> Metrics -> m () -gaugeSub x = gaugeAdd (-x) - --- | Get the current value of the Gauge -gaugeValue :: MonadIO m => Gauge -> m Double -gaugeValue (Gauge g) = liftIO $ P.getGauge g - ------------------------------------------------------------------------------ --- Histogram specifics - --- | A marker of a bucketing point -type Bucket = Double - --- | Description of discrete buckets which histogram samples will be allocated into -type Buckets = [Bucket] - --- | Describes a histogram metric -data HistogramInfo = HistogramInfo - { hiPath :: Path, - hiBuckets :: Buckets - } - deriving (Eq, Show) - -type RangeStart = Double - -type RangeEnd = Double - -type BucketWidth = Double - --- | Creates a 'HistogramInfo' which has evenly sized buckets of the given 'BucketWidth' --- between 'RangeStart' and 'RangeEnd' -linearHistogram :: Path -> RangeStart -> RangeEnd -> BucketWidth -> HistogramInfo -linearHistogram pth start end width = - HistogramInfo - { hiPath = pth, - hiBuckets = buckets - } - where - count :: Int - count = ceiling $ (end - start) / width - buckets :: Buckets - buckets = P.linearBuckets start width count - --- | Construct a histogram using a given list of buckets. --- It's recommended that you use 'linearHistogram' instead when possible. -customHistogram :: Path -> Buckets -> HistogramInfo -customHistogram pth buckets = HistogramInfo {hiPath = pth, hiBuckets = buckets} - --- | Create a histo for a 'HistogramInfo' -newHisto :: HistogramInfo -> IO Histogram -newHisto HistogramInfo {hiPath, hiBuckets} = - Histogram <$> P.register (P.histogram (toInfo hiPath) hiBuckets) - --- | Access the histogram for a given 'HistogramInfo' -histoGet :: - MonadIO m => - HistogramInfo -> - Metrics -> - m Histogram -histoGet hi@HistogramInfo {hiPath} m = getOrCreate (histograms m) hiPath (newHisto hi) - --- | Get the current distribution of a Histogram -histoValue :: MonadIO m => Histogram -> m (M.Map Bucket Int) -histoValue (Histogram histo) = liftIO $ P.getHistogram histo - --- | Report an individual value to be bucketed in the histogram -histoSubmit :: MonadIO m => Double -> HistogramInfo -> Metrics -> m () -histoSubmit val hi m = liftIO $ do - Histogram h <- histoGet hi m - P.observe h val - --- | Execute and time the provided monadic action and submit it as an entry --- to the provided Histogram metric. --- --- NOTE: If the action throws an exception it will NOT be reported. --- This is particularly relevant for web handlers which signal their response --- with an exception. -histoTimeAction :: (P.MonadMonitor m, MonadIO m) => HistogramInfo -> Metrics -> m a -> m a -histoTimeAction hi m act = do - Histogram h <- histoGet hi m - P.observeDuration h act diff --git a/libs/metrics-core/src/Data/Metrics/AWS.hs b/libs/metrics-core/src/Data/Metrics/AWS.hs index 7ff710f229c..437ad2f0628 100644 --- a/libs/metrics-core/src/Data/Metrics/AWS.hs +++ b/libs/metrics-core/src/Data/Metrics/AWS.hs @@ -16,14 +16,24 @@ module Data.Metrics.AWS (gaugeTokenRemaing) where -import Data.Metrics (Metrics, gaugeSet, path) import Data.Time import Imports +import Prometheus qualified as Prom -gaugeTokenRemaing :: Metrics -> Maybe NominalDiffTime -> IO () -gaugeTokenRemaing m mbRemaining = do +gaugeTokenRemaing :: Maybe NominalDiffTime -> IO () +gaugeTokenRemaing mbRemaining = do let t = toSeconds (fromMaybe 0 mbRemaining) - gaugeSet t (path "aws_auth.token_secs_remaining") m + Prom.setGauge awsAuthTokenSecsRemaining t where toSeconds :: NominalDiffTime -> Double toSeconds = fromRational . toRational + +{-# NOINLINE awsAuthTokenSecsRemaining #-} +awsAuthTokenSecsRemaining :: Prom.Gauge +awsAuthTokenSecsRemaining = + Prom.unsafeRegister $ + Prom.gauge + Prom.Info + { Prom.metricName = "aws_auth.token_secs_remaining", + Prom.metricHelp = "Number of seconds left before AWS Auth expires" + } diff --git a/libs/metrics-wai/metrics-wai.cabal b/libs/metrics-wai/metrics-wai.cabal index 3d9725348fe..ed848c893cb 100644 --- a/libs/metrics-wai/metrics-wai.cabal +++ b/libs/metrics-wai/metrics-wai.cabal @@ -12,7 +12,6 @@ build-type: Simple library exposed-modules: - Data.Metrics.Middleware Data.Metrics.Middleware.Prometheus Data.Metrics.Servant Data.Metrics.Test @@ -73,9 +72,7 @@ library base >=4 && <5 , bytestring >=0.10 , containers - , http-types >=0.8 , imports - , metrics-core >=0.3 , servant , servant-multipart , text >=0.11 diff --git a/libs/metrics-wai/src/Data/Metrics/Middleware.hs b/libs/metrics-wai/src/Data/Metrics/Middleware.hs deleted file mode 100644 index a65c902d6ff..00000000000 --- a/libs/metrics-wai/src/Data/Metrics/Middleware.hs +++ /dev/null @@ -1,80 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} - --- This file is part of the Wire Server implementation. --- --- Copyright (C) 2022 Wire Swiss GmbH --- --- This program is free software: you can redistribute it and/or modify it under --- the terms of the GNU Affero General Public License as published by the Free --- Software Foundation, either version 3 of the License, or (at your option) any --- later version. --- --- This program is distributed in the hope that it will be useful, but WITHOUT --- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS --- FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more --- details. --- --- You should have received a copy of the GNU Affero General Public License along --- with this program. If not, see . - --- | FUTUREWORK: use package wai-middleware-prometheus instead and deprecate collectd? -module Data.Metrics.Middleware - ( PathTemplate, - Paths, - withPathTemplate, - requestCounter, - module Data.Metrics, - ) -where - -import Data.Metrics -import Data.Metrics.Types -import Data.Text qualified as T -import Data.Text.Encoding qualified as T -import Imports -import Network.HTTP.Types -import Network.Wai -import Network.Wai.Internal (Response (ResponseRaw)) -import Network.Wai.Route.Tree qualified as Tree - -withPathTemplate :: Paths -> (PathTemplate -> Middleware) -> Middleware -withPathTemplate t f app r k = f (fromMaybe def tmp) app r k - where - def = PathTemplate "N/A" - tmp = - PathTemplate - . T.decodeUtf8 - <$> treeLookup t (Tree.segments $ rawPathInfo r) - --- Count Requests and their status code. --- --- [Note [Raw Response]]: --- --- We ignore the status code of raw responses which are returned after --- websocket communication ends because there is no meaningful status code --- to ask for. WAI uses the fallback response status code (i.e. 500) which --- is only used in servers which do not support raw responses (i.e. not --- Warp). -requestCounter :: Metrics -> PathTemplate -> Middleware -requestCounter m (PathTemplate t) f rq k = f rq onResponse - where - onResponse rs@(ResponseRaw _ _) = do - -- See Note [Raw Response] - counterIncr (path "net.requests") m - k rs - onResponse rs = do - counterIncr (path "net.requests") m - counterIncr (mkPath [t, methodName rq, "status", code rs]) m - k rs - -mkPath :: [Text] -> Path -mkPath = path . mconcat . intersperse "." . ("net.resources" :) -{-# INLINE mkPath #-} - -code :: Response -> Text -code = T.pack . show . statusCode . responseStatus -{-# INLINE code #-} - -methodName :: Request -> Text -methodName = T.decodeUtf8 . requestMethod -{-# INLINE methodName #-} diff --git a/libs/wai-utilities/wai-utilities.cabal b/libs/wai-utilities/wai-utilities.cabal index 4741ef9cd04..f40d486e0f2 100644 --- a/libs/wai-utilities/wai-utilities.cabal +++ b/libs/wai-utilities/wai-utilities.cabal @@ -88,7 +88,6 @@ library , imports , kan-extensions , metrics-core >=0.1 - , metrics-wai >=0.5.7 , openapi3 , pipes >=4.1 , prometheus-client diff --git a/services/background-worker/background-worker.cabal b/services/background-worker/background-worker.cabal index 31657b00ca5..4807e863625 100644 --- a/services/background-worker/background-worker.cabal +++ b/services/background-worker/background-worker.cabal @@ -37,7 +37,6 @@ library , http-client , http2-manager , imports - , metrics-core , metrics-wai , monad-control , prometheus-client diff --git a/services/background-worker/src/Wire/BackgroundWorker.hs b/services/background-worker/src/Wire/BackgroundWorker.hs index b5e745d6558..3a9bc8e298a 100644 --- a/services/background-worker/src/Wire/BackgroundWorker.hs +++ b/services/background-worker/src/Wire/BackgroundWorker.hs @@ -48,7 +48,7 @@ run opts = do -- Close the channel. `extended` will then close the connection, flushing messages to the server. Log.info l $ Log.msg $ Log.val "Closing RabbitMQ channel" Q.closeChannel chan - let server = defaultServer (T.unpack $ opts.backgroundWorker._host) opts.backgroundWorker._port env.logger env.metrics + let server = defaultServer (T.unpack $ opts.backgroundWorker._host) opts.backgroundWorker._port env.logger settings <- newSettings server -- Additional cleanup when shutting down via signals. runSettingsWithCleanup cleanup settings (servantApp env) Nothing diff --git a/services/background-worker/src/Wire/BackgroundWorker/Env.hs b/services/background-worker/src/Wire/BackgroundWorker/Env.hs index 0d3080595f6..9d1265fd131 100644 --- a/services/background-worker/src/Wire/BackgroundWorker/Env.hs +++ b/services/background-worker/src/Wire/BackgroundWorker/Env.hs @@ -7,7 +7,6 @@ import Control.Monad.Base import Control.Monad.Catch import Control.Monad.Trans.Control import Data.Map.Strict qualified as Map -import Data.Metrics qualified as Metrics import HTTP2.Client.Manager import Imports import Network.AMQP.Extended @@ -35,7 +34,6 @@ data Env = Env rabbitmqAdminClient :: RabbitMqAdmin.AdminAPI (Servant.AsClientT IO), rabbitmqVHost :: Text, logger :: Logger, - metrics :: Metrics.Metrics, federatorInternal :: Endpoint, httpManager :: Manager, defederationTimeout :: ResponseTimeout, @@ -75,7 +73,6 @@ mkEnv opts = do Map.fromList [ (BackendNotificationPusher, False) ] - metrics <- Metrics.metrics backendNotificationMetrics <- mkBackendNotificationMetrics let backendNotificationsConfig = opts.backendNotificationPusher pure Env {..} diff --git a/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs b/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs index 472f02d1f2e..6b53ed6e9e3 100644 --- a/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs +++ b/services/background-worker/test/Test/Wire/BackendNotificationPusherSpec.hs @@ -270,7 +270,6 @@ spec = do let federatorInternal = Endpoint "localhost" 8097 http2Manager = undefined statuses = undefined - metrics = undefined rabbitmqAdminClient = mockRabbitMqAdminClient mockAdmin rabbitmqVHost = "test-vhost" defederationTimeout = responseTimeoutNone @@ -288,7 +287,6 @@ spec = do let federatorInternal = Endpoint "localhost" 8097 http2Manager = undefined statuses = undefined - metrics = undefined rabbitmqAdminClient = mockRabbitMqAdminClient mockAdmin rabbitmqVHost = "test-vhost" defederationTimeout = responseTimeoutNone diff --git a/services/background-worker/test/Test/Wire/Util.hs b/services/background-worker/test/Test/Wire/Util.hs index ba698cccc2b..7c6fbf48aab 100644 --- a/services/background-worker/test/Test/Wire/Util.hs +++ b/services/background-worker/test/Test/Wire/Util.hs @@ -21,7 +21,6 @@ testEnv = do let federatorInternal = Endpoint "localhost" 0 rabbitmqAdminClient = undefined rabbitmqVHost = undefined - metrics = undefined defederationTimeout = responseTimeoutNone backendNotificationsConfig = BackendNotificationsConfig 1000 500000 1000 pure Env {..} diff --git a/services/brig/brig.cabal b/services/brig/brig.cabal index 29d5d17791a..8d4b4f71b8d 100644 --- a/services/brig/brig.cabal +++ b/services/brig/brig.cabal @@ -313,6 +313,7 @@ library , polysemy-plugin , polysemy-time , polysemy-wire-zoo + , prometheus-client , proto-lens >=0.1 , random-shuffle >=0.0.3 , raw-strings-qq diff --git a/services/brig/src/Brig/API/Public.hs b/services/brig/src/Brig/API/Public.hs index 3549efff504..654ba033d94 100644 --- a/services/brig/src/Brig/API/Public.hs +++ b/services/brig/src/Brig/API/Public.hs @@ -775,7 +775,7 @@ createUser (Public.NewUserPublic new) = lift . runExceptT $ do for_ (liftM2 (,) userEmail epair) $ \(e, p) -> sendActivationEmail e userDisplayName p (Just userLocale) newUserTeam for_ (liftM2 (,) userPhone ppair) $ \(p, c) -> - wrapClient $ sendActivationSms p c (Just userLocale) + wrapHttp $ sendActivationSms p c (Just userLocale) for_ (liftM3 (,,) userEmail (createdUserTeam result) newUserTeam) $ \(e, ct, ut) -> sendWelcomeEmail e ct ut (Just userLocale) cok <- @@ -945,7 +945,7 @@ changePhone u _ (Public.puPhone -> phone) = lift . exceptTToMaybe $ do (adata, pn) <- API.changePhone u phone loc <- lift $ wrapClient $ API.lookupLocale u let apair = (activationKey adata, activationCode adata) - lift . wrapClient $ sendActivationSms pn apair loc + lift . wrapHttp $ sendActivationSms pn apair loc removePhone :: ( Member (Embed HttpClientIO) r, @@ -1068,7 +1068,7 @@ beginPasswordReset (Public.NewPasswordReset target) = do loc <- lift $ wrapClient $ API.lookupLocale u lift $ case target of Left email -> sendPasswordResetMail email pair loc - Right phone -> wrapClient $ sendPasswordResetSms phone pair loc + Right phone -> wrapHttp $ sendPasswordResetSms phone pair loc completePasswordReset :: ( Member CodeStore r, diff --git a/services/brig/src/Brig/API/User.hs b/services/brig/src/Brig/API/User.hs index c80dd866ff6..a715cfb2525 100644 --- a/services/brig/src/Brig/API/User.hs +++ b/services/brig/src/Brig/API/User.hs @@ -143,7 +143,6 @@ import Data.Json.Util import Data.LegalHold (UserLegalHoldStatus (..), defUserLegalHoldStatus) import Data.List.Extra import Data.List1 as List1 (List1, singleton) -import Data.Metrics qualified as Metrics import Data.Misc import Data.Qualified import Data.Range @@ -155,6 +154,7 @@ import Polysemy import Polysemy.Input (Input) import Polysemy.TinyLog (TinyLog) import Polysemy.TinyLog qualified as Log +import Prometheus qualified as Prom import System.Logger.Message import UnliftIO.Async (mapConcurrently_) import Wire.API.Connection @@ -1143,11 +1143,10 @@ sendActivationCode emailOrPhone loc call = case emailOrPhone of throwE (ActivationBlacklistedUserKey pk) c <- lift . wrapClient $ fmap snd <$> Data.lookupActivationCode pk p <- wrapClientE $ mkPair pk c Nothing - void . forPhoneKey pk $ \ph -> - lift $ - if call - then wrapClient $ sendActivationCall ph p loc - else wrapClient $ sendActivationSms ph p loc + void . lift . wrapHttp $ forPhoneKey pk $ \ph -> + if call + then sendActivationCall ph p loc + else sendActivationSms ph p loc where notFound = throwM . UserDisplayNameNotFound mkPair k c u = do @@ -1377,7 +1376,7 @@ deleteSelfUser uid pwd = do let n = userDisplayName (accountUser a) either (\e -> lift $ sendDeletionEmail n e k v l) - (\p -> lift $ wrapClient $ sendDeletionSms p k v l) + (\p -> lift $ wrapHttp $ sendDeletionSms p k v l) target `onException` wrapClientE (Code.delete k Code.AccountDeletion) pure $! Just $! Code.codeTTL c @@ -1539,9 +1538,28 @@ deleteUsersNoVerify :: AppT r () deleteUsersNoVerify uids = do liftSem $ for_ uids deleteUserNoVerify - m <- view metrics - Metrics.counterAdd (fromIntegral . length $ uids) (Metrics.path "user.enqueue_multi_delete_total") m - Metrics.counterIncr (Metrics.path "user.enqueue_multi_delete_calls_total") m + void $ Prom.addCounter enqueueMultiDeleteCounter (fromIntegral $ length uids) + Prom.incCounter enqueueMultiDeleteCallsCounter + +{-# NOINLINE enqueueMultiDeleteCounter #-} +enqueueMultiDeleteCounter :: Prom.Counter +enqueueMultiDeleteCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.enqueue_multi_delete_total", + Prom.metricHelp = "Number of users enqueued to be deleted" + } + +{-# NOINLINE enqueueMultiDeleteCallsCounter #-} +enqueueMultiDeleteCallsCounter :: Prom.Counter +enqueueMultiDeleteCallsCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.enqueue_multi_delete_calls_total", + Prom.metricHelp = "Number of users enqueued to be deleted" + } -- | Similar to lookupProfiles except it returns all results and all errors -- allowing for partial success. diff --git a/services/brig/src/Brig/App.hs b/services/brig/src/Brig/App.hs index 5c0547b26c1..9aeafc73ef6 100644 --- a/services/brig/src/Brig/App.hs +++ b/services/brig/src/Brig/App.hs @@ -54,7 +54,6 @@ module Brig.App zauthEnv, digestSHA256, digestMD5, - metrics, applog, turnEnv, sftEnv, @@ -121,8 +120,6 @@ import Control.Monad.Trans.Resource import Data.ByteString.Conversion import Data.Credentials (Credentials (..)) import Data.Domain -import Data.Metrics (Metrics) -import Data.Metrics.Middleware qualified as Metrics import Data.Misc import Data.Qualified import Data.Text qualified as Text @@ -143,6 +140,7 @@ import OpenSSL.Session qualified as SSL import Polysemy import Polysemy.Final import Polysemy.Input (Input, input) +import Prometheus import Ropes.Nexmo qualified as Nexmo import Ropes.Twilio qualified as Twilio import Ssl.Util @@ -172,7 +170,6 @@ data Env = Env _smtpEnv :: Maybe SMTP.SMTP, _emailSender :: Email, _awsEnv :: AWS.Env, - _metrics :: Metrics, _applog :: Logger, _internalEvents :: QueueEnv, _requestId :: RequestId, @@ -216,7 +213,6 @@ newEnv o = do Just md5 <- getDigestByName "MD5" Just sha256 <- getDigestByName "SHA256" Just sha512 <- getDigestByName "SHA512" - mtr <- Metrics.metrics lgr <- Log.mkLogger (Opt.logLevel o) (Opt.logNetStrings o) (Opt.logFormat o) cas <- initCassandra o lgr mgr <- initHttpManager @@ -261,7 +257,7 @@ newEnv o = do kpLock <- newMVar () rabbitChan <- traverse (Q.mkRabbitMqChannelMVar lgr) o.rabbitmq let allDisabledVersions = foldMap expandVersionExp (Opt.setDisabledAPIVersions sett) - idxEnv <- mkIndexEnv o.elasticsearch lgr mtr (Opt.galley o) mgr + idxEnv <- mkIndexEnv o.elasticsearch lgr (Opt.galley o) mgr pure $! Env { _cargohold = mkEndpoint $ Opt.cargohold o, @@ -274,7 +270,6 @@ newEnv o = do _smtpEnv = emailSMTP, _emailSender = Opt.emailSender . Opt.general . Opt.emailSMS $ o, _awsEnv = aws, -- used by `journalEvent` directly - _metrics = mtr, _applog = lgr, _internalEvents = (eventsQueue :: QueueEnv), _requestId = RequestId "N/A", @@ -315,8 +310,8 @@ newEnv o = do pure (Nothing, Just smtp) mkEndpoint service = RPC.host (encodeUtf8 (service ^. host)) . RPC.port (service ^. port) $ RPC.empty -mkIndexEnv :: ElasticSearchOpts -> Logger -> Metrics -> Endpoint -> Manager -> IO IndexEnv -mkIndexEnv esOpts logger metricsStorage galleyEp rpcHttpManager = do +mkIndexEnv :: ElasticSearchOpts -> Logger -> Endpoint -> Manager -> IO IndexEnv +mkIndexEnv esOpts logger galleyEp rpcHttpManager = do mEsCreds :: Maybe Credentials <- for esOpts.credentials initCredentials mEsAddCreds :: Maybe Credentials <- for esOpts.additionalCredentials initCredentials @@ -331,8 +326,7 @@ mkIndexEnv esOpts logger metricsStorage galleyEp rpcHttpManager = do mkBhEnv esOpts.additionalInsecureSkipVerifyTls esOpts.additionalCaCert mEsAddCreds pure $ IndexEnv - { idxMetrics = metricsStorage, - idxLogger = esLogger, + { idxLogger = esLogger, idxElastic = bhEnv, idxRequest = Nothing, idxName = esOpts.index, @@ -488,6 +482,9 @@ instance Monad (AppT r) where instance MonadIO (AppT r) where liftIO io = AppT $ lift $ embedFinal io +instance MonadMonitor (AppT r) where + doIO = liftIO + instance MonadThrow (AppT r) where throwM = liftIO . throwM @@ -602,6 +599,9 @@ instance Cas.MonadClient HttpClientIO where liftIO $ runClient (view casClient env) cl localState f = local (casClient %~ f) +instance MonadMonitor HttpClientIO where + doIO = liftIO + wrapHttpClient :: HttpClientIO a -> AppT r a diff --git a/services/brig/src/Brig/Data/Client.hs b/services/brig/src/Brig/Data/Client.hs index 69e9ac0b829..b214ef7ab06 100644 --- a/services/brig/src/Brig/Data/Client.hs +++ b/services/brig/src/Brig/Data/Client.hs @@ -75,12 +75,12 @@ import Data.HashMap.Strict qualified as HashMap import Data.Id import Data.Json.Util (UTCTimeMillis, toUTCTimeMillis) import Data.Map qualified as Map -import Data.Metrics qualified as Metrics import Data.Set qualified as Set import Data.Text qualified as Text import Data.Time.Clock import Data.UUID qualified as UUID import Imports +import Prometheus qualified as Prom import System.CryptoBox (Result (Success)) import System.CryptoBox qualified as CryptoBox import System.Logger.Class (field, msg, val) @@ -289,7 +289,8 @@ claimPrekey :: ( Log.MonadLogger m, MonadMask m, MonadClient m, - MonadReader Brig.App.Env m + MonadReader Brig.App.Env m, + Prom.MonadMonitor m ) => UserId -> ClientId -> @@ -498,7 +499,8 @@ withOptLock :: forall a m. ( MonadIO m, MonadReader Brig.App.Env m, - Log.MonadLogger m + Log.MonadLogger m, + Prom.MonadMonitor m ) => UserId -> ClientId -> @@ -545,15 +547,14 @@ withOptLock u c ma = go (10 :: Int) toAttributeValue :: Word32 -> AWS.AttributeValue toAttributeValue w = AWS.N $ AWS.toText (fromIntegral w :: Int) reportAttemptFailure :: m () - reportAttemptFailure = - Metrics.counterIncr (Metrics.path "client.opt_lock.optimistic_lock_grab_attempt_failed") =<< view metrics + reportAttemptFailure = Prom.incCounter optimisticLockGrabAttemptFailedCounter reportFailureAndLogError :: m () reportFailureAndLogError = do Log.err $ Log.field "user" (toByteString' u) . Log.field "client" (toByteString' c) . msg (val "PreKeys: Optimistic lock failed") - Metrics.counterIncr (Metrics.path "client.opt_lock.optimistic_lock_failed") =<< view metrics + Prom.incCounter optimisticLockFailedCounter execDyn :: forall r x. (AWS.AWSRequest r, Typeable r, Typeable (AWS.AWSResponse r)) => @@ -563,27 +564,55 @@ withOptLock u c ma = go (10 :: Int) execDyn cnv mkCmd = do cmd <- mkCmd <$> view (awsEnv . prekeyTable) e <- view (awsEnv . amazonkaEnv) - m <- view metrics - liftIO $ execDyn' e m cnv cmd + liftIO $ execDyn' e cnv cmd where execDyn' :: forall y p. (AWS.AWSRequest p, Typeable (AWS.AWSResponse p), Typeable p) => AWS.Env -> - Metrics.Metrics -> (AWS.AWSResponse p -> Maybe y) -> p -> IO (Maybe y) - execDyn' e m conv cmd = recovering policy handlers (const run) + execDyn' e conv cmd = recovering policy handlers (const run) where run = execCatch e cmd >>= either handleErr (pure . conv) handlers = httpHandlers ++ [const $ EL.handler_ AWS._ConditionalCheckFailedException (pure True)] policy = limitRetries 3 <> exponentialBackoff 100000 handleErr (AWS.ServiceError se) | se ^. AWS.serviceError_code == AWS.ErrorCode "ProvisionedThroughputExceeded" = do - Metrics.counterIncr (Metrics.path "client.opt_lock.provisioned_throughput_exceeded") m + Prom.incCounter dynProvisionedThroughputExceededCounter pure Nothing handleErr _ = pure Nothing withLocalLock :: (MonadMask m, MonadIO m) => MVar () -> m a -> m a withLocalLock l ma = do (takeMVar l *> ma) `finally` putMVar l () + +{-# NOINLINE optimisticLockGrabAttemptFailedCounter #-} +optimisticLockGrabAttemptFailedCounter :: Prom.Counter +optimisticLockGrabAttemptFailedCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "client.opt_lock.optimistic_lock_grab_attempt_failed", + Prom.metricHelp = "Number of times grab attempts for optimisitic lock on prekeys failed" + } + +{-# NOINLINE optimisticLockFailedCounter #-} +optimisticLockFailedCounter :: Prom.Counter +optimisticLockFailedCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "client.opt_lock.optimistic_lock_failed", + Prom.metricHelp = "Number of time optimisitic lock on prekeys failed" + } + +{-# NOINLINE dynProvisionedThroughputExceededCounter #-} +dynProvisionedThroughputExceededCounter :: Prom.Counter +dynProvisionedThroughputExceededCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "client.opt_lock.provisioned_throughput_exceeded", + Prom.metricHelp = "Number of times provisioned throughput on DynamoDB was exceeded" + } diff --git a/services/brig/src/Brig/Index/Eval.hs b/services/brig/src/Brig/Index/Eval.hs index 05c5e688882..c19d000c5d9 100644 --- a/services/brig/src/Brig/Index/Eval.hs +++ b/services/brig/src/Brig/Index/Eval.hs @@ -37,7 +37,6 @@ import Data.Aeson (FromJSON) import Data.Aeson qualified as Aeson import Data.ByteString.Lazy.UTF8 qualified as UTF8 import Data.Credentials (Credentials (..)) -import Data.Metrics qualified as Metrics import Database.Bloodhound qualified as ES import Imports import System.Logger qualified as Log @@ -111,8 +110,7 @@ runCommand l = \case additionalCaCert = Nothing } - metricsStorage <- Metrics.metrics - mkIndexEnv esOpts l metricsStorage gly mgr + mkIndexEnv esOpts l gly mgr initES esURI mgr mCreds = let env = ES.mkBHEnv (toESServer esURI) mgr diff --git a/services/brig/src/Brig/Index/Migrations.hs b/services/brig/src/Brig/Index/Migrations.hs index f743f62c157..2fbb8ce5455 100644 --- a/services/brig/src/Brig/Index/Migrations.hs +++ b/services/brig/src/Brig/Index/Migrations.hs @@ -29,7 +29,6 @@ import Control.Lens (to, view, (^.)) import Control.Monad.Catch (MonadThrow, catchAll, finally, throwM) import Data.Aeson (Value, object, (.=)) import Data.Credentials (Credentials (..)) -import Data.Metrics qualified as Metrics import Data.Text qualified as Text import Database.Bloodhound qualified as ES import Imports @@ -87,7 +86,6 @@ mkEnv l es cas galleyEndpoint = do Env envWithAuth <$> initCassandra <*> initLogger - <*> Metrics.metrics <*> pure (view (Opts.esConnection . to Opts.esIndex) es) <*> pure mCreds <*> pure rpcMgr diff --git a/services/brig/src/Brig/Index/Migrations/Types.hs b/services/brig/src/Brig/Index/Migrations/Types.hs index 853570ffb6f..389868c06eb 100644 --- a/services/brig/src/Brig/Index/Migrations/Types.hs +++ b/services/brig/src/Brig/Index/Migrations/Types.hs @@ -25,7 +25,6 @@ import Cassandra qualified as C import Control.Monad.Catch (MonadThrow) import Data.Aeson (FromJSON (..), ToJSON (..), object, withObject, (.:), (.=)) import Data.Credentials (Credentials) -import Data.Metrics (Metrics) import Database.Bloodhound qualified as ES import Imports import Network.HTTP.Client (Manager) @@ -71,7 +70,7 @@ instance MonadIO m => MonadLogger (MigrationActionT m) where instance MonadIO m => Search.MonadIndexIO (MigrationActionT m) where liftIndexIO m = do Env {..} <- ask - let indexEnv = Search.IndexEnv metrics logger bhEnv Nothing searchIndex Nothing Nothing galleyEndpoint httpManager searchIndexCredentials + let indexEnv = Search.IndexEnv logger bhEnv Nothing searchIndex Nothing Nothing galleyEndpoint httpManager searchIndexCredentials Search.runIndexIO indexEnv m instance MonadIO m => ES.MonadBH (MigrationActionT m) where @@ -81,7 +80,6 @@ data Env = Env { bhEnv :: ES.BHEnv, cassandraClientState :: C.ClientState, logger :: Logger.Logger, - metrics :: Metrics, searchIndex :: ES.IndexName, searchIndexCredentials :: Maybe Credentials, httpManager :: Manager, diff --git a/services/brig/src/Brig/Phone.hs b/services/brig/src/Brig/Phone.hs index 9df603e4cc1..e87a46ea739 100644 --- a/services/brig/src/Brig/Phone.hs +++ b/services/brig/src/Brig/Phone.hs @@ -45,11 +45,11 @@ import Control.Lens (view) import Control.Monad.Catch import Control.Retry import Data.LanguageCodes -import Data.Metrics qualified as Metrics import Data.Text qualified as Text import Data.Time.Clock import Imports import Network.HTTP.Client (HttpException, Manager) +import Prometheus qualified as Prom import Ropes.Nexmo qualified as Nexmo import Ropes.Twilio (LookupDetail (..)) import Ropes.Twilio qualified as Twilio @@ -75,7 +75,7 @@ data PhoneException instance Exception PhoneException sendCall :: - (MonadClient m, MonadReader Env m, Log.MonadLogger m) => + (MonadClient m, MonadReader Env m, Log.MonadLogger m, Prom.MonadMonitor m) => Nexmo.Call -> m () sendCall call = unless (isTestPhone $ Nexmo.callTo call) $ do @@ -115,7 +115,8 @@ sendSms :: ( MonadClient m, MonadCatch m, Log.MonadLogger m, - MonadReader Env m + MonadReader Env m, + Prom.MonadMonitor m ) => Locale -> SMSMessage -> @@ -234,7 +235,7 @@ smsBudget = withSmsBudget :: ( MonadClient m, Log.MonadLogger m, - MonadReader Env m + Prom.MonadMonitor m ) => Text -> m a -> @@ -247,7 +248,7 @@ withSmsBudget phone go = do Log.info $ msg (val "SMS budget exhausted.") ~~ field "phone" phone - Metrics.counterIncr (Metrics.path "budget.sms.exhausted") =<< view metrics + Prom.incCounter smsBudgetExhaustedCounter throwM (PhoneBudgetExhausted t) BudgetedValue a b -> do Log.debug $ @@ -269,7 +270,7 @@ callBudget = withCallBudget :: ( MonadClient m, Log.MonadLogger m, - MonadReader Env m + Prom.MonadMonitor m ) => Text -> m a -> @@ -282,7 +283,7 @@ withCallBudget phone go = do Log.info $ msg (val "Voice call budget exhausted.") ~~ field "phone" phone - Metrics.counterIncr (Metrics.path "budget.call.exhausted") =<< view metrics + Prom.incCounter callBudgetExhaustedCounter throwM (PhoneBudgetExhausted t) BudgetedValue a b -> do Log.debug $ @@ -317,3 +318,26 @@ mkPhoneKey orig = x3 :: RetryPolicy x3 = limitRetries 3 <> exponentialBackoff 100000 + +------------------------------------------------------------------------------- +-- Metrics + +{-# NOINLINE callBudgetExhaustedCounter #-} +callBudgetExhaustedCounter :: Prom.Counter +callBudgetExhaustedCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "budget.call.exhausted", + Prom.metricHelp = "Number of times budget for calls got exhausted" + } + +{-# NOINLINE smsBudgetExhaustedCounter #-} +smsBudgetExhaustedCounter :: Prom.Counter +smsBudgetExhaustedCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "budget.sms.exhausted", + Prom.metricHelp = "Number of times budget for sending SMS got exhausted" + } diff --git a/services/brig/src/Brig/Run.hs b/services/brig/src/Brig/Run.hs index 8e85ea01689..7e31ac802b1 100644 --- a/services/brig/src/Brig/Run.hs +++ b/services/brig/src/Brig/Run.hs @@ -108,7 +108,7 @@ run o = do closeEnv e where endpoint' = brig o - server e = defaultServer (unpack $ endpoint' ^. host) (endpoint' ^. port) (e ^. applog) (e ^. metrics) + server e = defaultServer (unpack $ endpoint' ^. host) (endpoint' ^. port) (e ^. applog) mkApp :: Opts -> IO (Wai.Application, Env) mkApp o = do @@ -242,10 +242,9 @@ pendingActivationCleanup = do collectAuthMetrics :: forall r. AppT r () collectAuthMetrics = do - m <- view metrics env <- view (awsEnv . amazonkaEnv) liftIO $ forever $ do mbRemaining <- readAuthExpiration env - gaugeTokenRemaing m mbRemaining + gaugeTokenRemaing mbRemaining threadDelay 1_000_000 diff --git a/services/brig/src/Brig/User/Auth/Cookie.hs b/services/brig/src/Brig/User/Auth/Cookie.hs index 1bd569bc352..ebed216d947 100644 --- a/services/brig/src/Brig/User/Auth/Cookie.hs +++ b/services/brig/src/Brig/User/Auth/Cookie.hs @@ -52,13 +52,13 @@ import Control.Monad.Except import Data.ByteString.Conversion import Data.Id import Data.List qualified as List -import Data.Metrics qualified as Metrics import Data.Proxy import Data.RetryAfter import Data.Time.Clock import Imports import Network.Wai (Response) import Network.Wai.Utilities.Response (addHeader) +import Prometheus qualified as Prom import System.Logger.Class (field, msg, val, (~~)) import System.Logger.Class qualified as Log import Web.Cookie qualified as WebCookie @@ -104,7 +104,8 @@ nextCookie :: MonadReader Env m, Log.MonadLogger m, ZAuth.MonadZAuth m, - MonadClient m + MonadClient m, + Prom.MonadMonitor m ) => Cookie (ZAuth.Token u) -> Maybe ClientId -> @@ -291,11 +292,20 @@ toWebCookie c = do -------------------------------------------------------------------------------- -- Tracking -trackSuperseded :: (MonadReader Env m, MonadIO m, Log.MonadLogger m) => UserId -> CookieId -> m () +trackSuperseded :: (MonadIO m, Log.MonadLogger m, Prom.MonadMonitor m) => UserId -> CookieId -> m () trackSuperseded u c = do - m <- view metrics - Metrics.counterIncr (Metrics.path "user.auth.cookie.superseded") m + Prom.incCounter cookieSupersededCounter Log.warn $ msg (val "Superseded cookie used") ~~ field "user" (toByteString u) ~~ field "cookie" (cookieIdNum c) + +{-# NOINLINE cookieSupersededCounter #-} +cookieSupersededCounter :: Prom.Counter +cookieSupersededCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.auth.cookie.superseded", + Prom.metricHelp = "Number of times user's cookie got superseded" + } diff --git a/services/brig/src/Brig/User/Phone.hs b/services/brig/src/Brig/User/Phone.hs index f12541ae0aa..f05880eeae5 100644 --- a/services/brig/src/Brig/User/Phone.hs +++ b/services/brig/src/Brig/User/Phone.hs @@ -51,6 +51,7 @@ import Data.Text qualified as Text import Data.Text.Ascii qualified as Ascii import Data.Text.Lazy (toStrict) import Imports +import Prometheus (MonadMonitor) import Ropes.Nexmo qualified as Nexmo import System.Logger.Class qualified as Log import Wire.API.User @@ -62,7 +63,8 @@ sendActivationSms :: ( MonadClient m, MonadReader Env m, MonadCatch m, - Log.MonadLogger m + Log.MonadLogger m, + MonadMonitor m ) => Phone -> ActivationPair -> @@ -77,7 +79,8 @@ sendPasswordResetSms :: ( MonadClient m, MonadReader Env m, MonadCatch m, - Log.MonadLogger m + Log.MonadLogger m, + MonadMonitor m ) => Phone -> PasswordResetPair -> @@ -92,7 +95,8 @@ sendLoginSms :: ( MonadClient m, MonadReader Env m, MonadCatch m, - Log.MonadLogger m + Log.MonadLogger m, + MonadMonitor m ) => Phone -> LoginCode -> @@ -107,7 +111,8 @@ sendDeletionSms :: ( MonadClient m, MonadReader Env m, MonadCatch m, - Log.MonadLogger m + Log.MonadLogger m, + MonadMonitor m ) => Phone -> Code.Key -> @@ -122,7 +127,8 @@ sendDeletionSms to key code loc = do sendActivationCall :: ( MonadClient m, MonadReader Env m, - Log.MonadLogger m + Log.MonadLogger m, + MonadMonitor m ) => Phone -> ActivationPair -> @@ -136,7 +142,8 @@ sendActivationCall to (_, c) loc = do sendLoginCall :: ( MonadClient m, MonadReader Env m, - Log.MonadLogger m + Log.MonadLogger m, + MonadMonitor m ) => Phone -> LoginCode -> diff --git a/services/brig/src/Brig/User/Search/Index.hs b/services/brig/src/Brig/User/Search/Index.hs index 9df5255ce84..b9c098eb4c4 100644 --- a/services/brig/src/Brig/User/Search/Index.hs +++ b/services/brig/src/Brig/User/Search/Index.hs @@ -78,19 +78,19 @@ import Data.Credentials import Data.Handle (Handle) import Data.Id import Data.Map qualified as Map -import Data.Metrics import Data.Text qualified as T import Data.Text qualified as Text import Data.Text.Encoding import Data.Text.Encoding.Error import Data.Text.Lazy qualified as LT -import Data.Text.Lazy.Builder.Int (decimal) import Data.Text.Lens hiding (text) import Data.UUID qualified as UUID import Database.Bloodhound qualified as ES import Imports hiding (log, searchable) import Network.HTTP.Client hiding (host, path, port) import Network.HTTP.Types (StdMethod (POST), hContentType, statusCode) +import Prometheus (MonadMonitor) +import Prometheus qualified as Prom import SAML2.WebSSO.Types qualified as SAML import System.Logger qualified as Log import System.Logger.Class (Logger, MonadLogger (..), field, info, msg, val, (+++), (~~)) @@ -106,8 +106,7 @@ import Wire.API.User.Search (Sso (..)) -- IndexIO Monad data IndexEnv = IndexEnv - { idxMetrics :: Metrics, - idxLogger :: Logger, + { idxLogger :: Logger, idxElastic :: ES.BHEnv, idxRequest :: Maybe RequestId, idxName :: ES.IndexName, @@ -129,7 +128,8 @@ newtype IndexIO a = IndexIO (ReaderT IndexEnv IO a) MonadReader IndexEnv, MonadThrow, MonadCatch, - MonadMask + MonadMask, + MonadMonitor ) runIndexIO :: MonadIO m => IndexEnv -> IndexIO a -> m a @@ -173,15 +173,14 @@ withAdditionalESUrl action = do -------------------------------------------------------------------------------- -- Updates -reindex :: (MonadLogger m, MonadIndexIO m, C.MonadClient m) => UserId -> m () +reindex :: (MonadLogger m, MonadIndexIO m, C.MonadClient m, Prom.MonadMonitor IndexIO) => UserId -> m () reindex u = do ixu <- lookupIndexUser u updateIndex (maybe (IndexDeleteUser u) (IndexUpdateUser IndexUpdateIfNewerVersion) ixu) -updateIndex :: MonadIndexIO m => IndexUpdate -> m () +updateIndex :: (MonadIndexIO m, Prom.MonadMonitor IndexIO) => IndexUpdate -> m () updateIndex (IndexUpdateUser updateType iu) = liftIndexIO $ do - m <- asks idxMetrics - counterIncr (path "user.index.update.count") m + Prom.incCounter indexUpdateCounter info $ field "user" (Bytes.toByteString (view iuUserId iu)) . msg (val "Indexing user") @@ -191,20 +190,18 @@ updateIndex (IndexUpdateUser updateType iu) = liftIndexIO $ do where indexDoc :: (MonadIndexIO m, MonadThrow m) => ES.IndexName -> ES.BH m () indexDoc idx = do - m <- lift . liftIndexIO $ asks idxMetrics r <- ES.indexDocument idx mappingName versioning (indexToDoc iu) docId unless (ES.isSuccess r || ES.isVersionConflict r) $ do - counterIncr (path "user.index.update.err") m + liftIO $ Prom.incCounter indexUpdateErrorCounter ES.parseEsResponse r >>= throwM . IndexUpdateError . either id id - counterIncr (path "user.index.update.ok") m + liftIO $ Prom.incCounter indexUpdateSuccessCounter versioning = ES.defaultIndexDocumentSettings { ES.idsVersionControl = indexUpdateToVersionControl updateType (ES.ExternalDocVersion (docVersion (_iuVersion iu))) } docId = ES.DocId (view (iuUserId . re _TextId) iu) updateIndex (IndexUpdateUsers updateType ius) = liftIndexIO $ do - m <- asks idxMetrics - counterIncr (path "user.index.update.bulk.count") m + Prom.incCounter indexBulkUpdateCounter info $ field "num_users" (length ius) . msg (val "Bulk indexing users") @@ -226,14 +223,11 @@ updateIndex (IndexUpdateUsers updateType ius) = liftIndexIO $ do } (ES.bhManager bhe) unless (ES.isSuccess res) $ do - counterIncr (path "user.index.update.bulk.err") m + Prom.incCounter indexBulkUpdateErrorCounter ES.parseEsResponse res >>= throwM . IndexUpdateError . either id id - counterIncr (path "user.index.update.bulk.ok") m + Prom.incCounter indexBulkUpdateSuccessCounter for_ (statuses res) $ \(s, f) -> - counterAdd - (fromIntegral f) - (path ("user.index.update.bulk.status." <> review builder (decimal s))) - m + Prom.withLabel indexBulkUpdateResponseCounter (Text.pack $ show s) $ (void . flip Prom.addCounter (fromIntegral f)) where mkAuthHeaders = do creds <- asks idxCredentials @@ -261,7 +255,7 @@ updateIndex (IndexUpdateUsers updateType ius) = liftIndexIO $ do . toListOf (key "items" . values . key "index" . key "status" . _Integral) . responseBody updateIndex (IndexDeleteUser u) = liftIndexIO $ do - counterIncr (path "user.index.delete.count") =<< asks idxMetrics + Prom.incCounter indexDeleteCounter info $ field "user" (Bytes.toByteString u) . msg (val "(Soft) deleting user from index") @@ -972,3 +966,87 @@ instance Show ParseException where ++ m instance Exception ParseException + +--------------------------------------------------------------------------------- +-- Metrics + +{-# NOINLINE indexUpdateCounter #-} +indexUpdateCounter :: Prom.Counter +indexUpdateCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.update.count", + Prom.metricHelp = "Number of updates on user index" + } + +{-# NOINLINE indexUpdateErrorCounter #-} +indexUpdateErrorCounter :: Prom.Counter +indexUpdateErrorCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.update.err", + Prom.metricHelp = "Number of errors during user index update" + } + +{-# NOINLINE indexUpdateSuccessCounter #-} +indexUpdateSuccessCounter :: Prom.Counter +indexUpdateSuccessCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.update.ok", + Prom.metricHelp = "Number of successful user index updates" + } + +{-# NOINLINE indexBulkUpdateCounter #-} +indexBulkUpdateCounter :: Prom.Counter +indexBulkUpdateCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.update.bulk.count", + Prom.metricHelp = "Number of bulk updates on user index" + } + +{-# NOINLINE indexBulkUpdateErrorCounter #-} +indexBulkUpdateErrorCounter :: Prom.Counter +indexBulkUpdateErrorCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.update.bulk.err", + Prom.metricHelp = "Number of errors during bulk updates on user index" + } + +{-# NOINLINE indexBulkUpdateSuccessCounter #-} +indexBulkUpdateSuccessCounter :: Prom.Counter +indexBulkUpdateSuccessCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.update.bulk.ok", + Prom.metricHelp = "Number of successful bulk updates on user index" + } + +{-# NOINLINE indexBulkUpdateResponseCounter #-} +indexBulkUpdateResponseCounter :: Prom.Vector Prom.Label1 Prom.Counter +indexBulkUpdateResponseCounter = + Prom.unsafeRegister $ + Prom.vector ("status") $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.update.bulk.response", + Prom.metricHelp = "Number of successful bulk updates on user index" + } + +{-# NOINLINE indexDeleteCounter #-} +indexDeleteCounter :: Prom.Counter +indexDeleteCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "user.index.delete.count", + Prom.metricHelp = "Number of deletes on user index" + } diff --git a/services/cannon/cannon.cabal b/services/cannon/cannon.cabal index e69c1a663ae..73ef7133bef 100644 --- a/services/cannon/cannon.cabal +++ b/services/cannon/cannon.cabal @@ -97,6 +97,7 @@ library , lens-family-core >=1.1 , metrics-wai >=0.4 , mwc-random >=0.13 + , prometheus-client , retry >=0.7 , safe-exceptions , servant-conduit diff --git a/services/cannon/src/Cannon/Run.hs b/services/cannon/src/Cannon/Run.hs index 7ba53d95e9f..ba8256cb62b 100644 --- a/services/cannon/src/Cannon/Run.hs +++ b/services/cannon/src/Cannon/Run.hs @@ -27,7 +27,7 @@ import Cannon.API.Public import Cannon.App (maxPingInterval) import Cannon.Dict qualified as D import Cannon.Options -import Cannon.Types (Cannon, applog, clients, env, mkEnv, monitor, runCannon', runCannonToServant) +import Cannon.Types (Cannon, applog, clients, env, mkEnv, runCannon', runCannonToServant) import Cannon.WS hiding (env) import Control.Concurrent import Control.Concurrent.Async qualified as Async @@ -35,8 +35,6 @@ import Control.Exception qualified as E import Control.Exception.Safe (catchAny) import Control.Lens ((^.)) import Control.Monad.Catch (MonadCatch, finally) -import Data.Metrics.Middleware (gaugeSet, path) -import Data.Metrics.Middleware qualified as Middleware import Data.Metrics.Servant import Data.Proxy import Data.Text (pack, strip) @@ -47,6 +45,7 @@ import Network.Wai qualified as Wai import Network.Wai.Handler.Warp hiding (run) import Network.Wai.Middleware.Gzip qualified as Gzip import Network.Wai.Utilities.Server +import Prometheus qualified as Prom import Servant import System.IO.Strict qualified as Strict import System.Logger.Class qualified as LC @@ -68,16 +67,15 @@ run o = do when (o ^. drainOpts . gracePeriodSeconds == 0) $ error "drainOpts.gracePeriodSeconds must not be set to 0." ext <- loadExternal - m <- Middleware.metrics g <- L.mkLogger (o ^. logLevel) (o ^. logNetStrings) (o ^. logFormat) e <- - mkEnv m ext o g + mkEnv ext o g <$> D.empty 128 <*> newManager defaultManagerSettings {managerConnCount = 128} <*> createSystemRandom <*> mkClock refreshMetricsThread <- Async.async $ runCannon' e refreshMetrics - s <- newSettings $ Server (o ^. cannon . host) (o ^. cannon . port) (applog e) m (Just idleTimeout) + s <- newSettings $ Server (o ^. cannon . host) (o ^. cannon . port) (applog e) (Just idleTimeout) let middleware :: Wai.Middleware middleware = @@ -133,11 +131,11 @@ instance Exception SignalledToExit refreshMetrics :: Cannon () refreshMetrics = do - m <- monitor c <- clients safeForever $ do s <- D.size c - gaugeSet (fromIntegral s) (path "net.websocket.clients") m + Prom.setGauge websocketClientsGauge (fromIntegral s) + -- gaugeSet (fromIntegral s) (path "") m liftIO $ threadDelay 1000000 where safeForever :: (MonadIO m, LC.MonadLogger m, MonadCatch m) => m () -> m () @@ -146,3 +144,13 @@ refreshMetrics = do action `catchAny` \exc -> do LC.err $ "error" LC..= show exc LC.~~ LC.msg (LC.val "refreshMetrics failed") liftIO $ threadDelay 60000000 -- pause to keep worst-case noise in logs manageable + +{-# NOINLINE websocketClientsGauge #-} +websocketClientsGauge :: Prom.Gauge +websocketClientsGauge = + Prom.unsafeRegister $ + Prom.gauge + Prom.Info + { Prom.metricName = "net.websocket.clients", + Prom.metricHelp = "Number of connected websocket clients" + } diff --git a/services/cannon/src/Cannon/Types.hs b/services/cannon/src/Cannon/Types.hs index 31abc52800c..f9d34c5e788 100644 --- a/services/cannon/src/Cannon/Types.hs +++ b/services/cannon/src/Cannon/Types.hs @@ -19,7 +19,6 @@ module Cannon.Types ( Env, - mon, opts, applog, dict, @@ -32,7 +31,6 @@ module Cannon.Types runCannon', options, clients, - monitor, wsenv, runCannonToServant, ) @@ -47,12 +45,12 @@ import Cannon.WS qualified as WS import Control.Concurrent.Async (mapConcurrently) import Control.Lens ((^.)) import Control.Monad.Catch -import Data.Metrics.Middleware import Data.Text.Encoding import Imports import Network.Wai import Network.Wai.Utilities.Request qualified as Wai import Network.Wai.Utilities.Server +import Prometheus import Servant qualified import System.Logger qualified as Logger import System.Logger.Class hiding (info) @@ -62,8 +60,7 @@ import System.Random.MWC (GenIO) -- Cannon monad data Env = Env - { mon :: !Metrics, - opts :: !Opts, + { opts :: !Opts, applog :: !Logger, dict :: !(Dict Key Websocket), reqId :: !RequestId, @@ -80,7 +77,8 @@ newtype Cannon a = Cannon MonadIO, MonadThrow, MonadCatch, - MonadMask + MonadMask, + MonadMonitor ) mapConcurrentlyCannon :: Traversable t => (a -> Cannon b) -> t a -> Cannon (t b) @@ -99,7 +97,6 @@ instance HasRequestId Cannon where getRequestId = Cannon $ asks reqId mkEnv :: - Metrics -> ByteString -> Opts -> Logger -> @@ -108,8 +105,8 @@ mkEnv :: GenIO -> Clock -> Env -mkEnv m external o l d p g t = - Env m o l d (RequestId "N/A") $ +mkEnv external o l d p g t = + Env o l d (RequestId "N/A") $ WS.env external (o ^. cannon . port) (encodeUtf8 $ o ^. gundeck . host) (o ^. gundeck . port) l p d g t (o ^. drainOpts) runCannon :: Env -> Cannon a -> Request -> IO a @@ -127,9 +124,6 @@ options = Cannon $ asks opts clients :: Cannon (Dict Key Websocket) clients = Cannon $ asks dict -monitor :: Cannon Metrics -monitor = Cannon $ asks mon - wsenv :: Cannon WS.Env wsenv = Cannon $ do e <- asks env diff --git a/services/cargohold/cargohold.cabal b/services/cargohold/cargohold.cabal index f3c5ad95c44..2a8a5b2ba93 100644 --- a/services/cargohold/cargohold.cabal +++ b/services/cargohold/cargohold.cabal @@ -115,6 +115,7 @@ library , metrics-core , metrics-wai >=0.4 , mime >=0.4 + , prometheus-client , resourcet >=1.1 , retry >=0.5 , servant diff --git a/services/cargohold/src/CargoHold/App.hs b/services/cargohold/src/CargoHold/App.hs index 36af17c0051..1f334acb8c2 100644 --- a/services/cargohold/src/CargoHold/App.hs +++ b/services/cargohold/src/CargoHold/App.hs @@ -29,7 +29,6 @@ module CargoHold.App multiIngress, httpManager, http2Manager, - metrics, appLogger, requestId, localUnit, @@ -62,8 +61,6 @@ import Control.Lens (Lens', makeLenses, non, view, (?~), (^.)) import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow) import Control.Monad.Trans.Resource (ResourceT, runResourceT, transResourceT) import qualified Data.Map as Map -import Data.Metrics.Middleware (Metrics) -import qualified Data.Metrics.Middleware as Metrics import Data.Qualified import HTTP2.Client.Manager (Http2Manager, http2ManagerWithSSLCtx) import Imports hiding (log) @@ -72,6 +69,7 @@ import Network.HTTP.Client.OpenSSL import Network.Wai.Utilities (Error (..)) import OpenSSL.Session (SSLContext, SSLOption (..)) import qualified OpenSSL.Session as SSL +import Prometheus import qualified Servant.Client as Servant import System.Logger.Class hiding (settings) import qualified System.Logger.Extended as Log @@ -84,7 +82,6 @@ import qualified Wire.API.Routes.Internal.Brig as IBrig data Env = Env { _aws :: AWS.Env, - _metrics :: Metrics, _appLogger :: Logger, _httpManager :: Manager, _http2Manager :: Http2Manager, @@ -101,7 +98,6 @@ settings = options . Opt.settings newEnv :: Opts -> IO Env newEnv opts = do - metricsStorage <- Metrics.metrics logger <- Log.mkLogger (opts ^. Opt.logLevel) (opts ^. Opt.logNetStrings) (opts ^. Opt.logFormat) checkOpts opts logger httpMgr <- initHttpManager (opts ^. Opt.aws . Opt.s3Compatibility) @@ -109,7 +105,7 @@ newEnv opts = do awsEnv <- initAws (opts ^. Opt.aws) logger httpMgr multiIngressAWS <- initMultiIngressAWS logger httpMgr let localDomain = toLocalUnsafe (opts ^. Opt.settings . Opt.federationDomain) () - pure $ Env awsEnv metricsStorage logger httpMgr http2Mgr (RequestId "N/A") opts localDomain multiIngressAWS + pure $ Env awsEnv logger httpMgr http2Mgr (RequestId "N/A") opts localDomain multiIngressAWS where initMultiIngressAWS :: Logger -> Manager -> IO (Map String AWS.Env) initMultiIngressAWS logger httpMgr = @@ -205,7 +201,8 @@ newtype AppT m a = AppT (ReaderT Env m a) MonadThrow, MonadCatch, MonadMask, - MonadReader Env + MonadReader Env, + MonadMonitor ) type App = AppT IO diff --git a/services/cargohold/src/CargoHold/Metrics.hs b/services/cargohold/src/CargoHold/Metrics.hs index aa21c396891..34d0c08fca4 100644 --- a/services/cargohold/src/CargoHold/Metrics.hs +++ b/services/cargohold/src/CargoHold/Metrics.hs @@ -17,17 +17,32 @@ module CargoHold.Metrics where -import CargoHold.App (Env, metrics) -import Control.Lens (view) -import Data.Metrics.Middleware (counterAdd, counterIncr, path) import Imports +import qualified Prometheus as Prom -s3UploadOk :: (MonadReader Env m, MonadIO m) => m () -s3UploadOk = - counterIncr (path "net.s3.upload_ok") - =<< view metrics +s3UploadOk :: Prom.MonadMonitor m => m () +s3UploadOk = Prom.incCounter netS3UploadOk -s3UploadSize :: (MonadReader Env m, MonadIO m, Integral n) => n -> m () +{-# NOINLINE netS3UploadOk #-} +netS3UploadOk :: Prom.Counter +netS3UploadOk = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "net.s3.upload_ok", + Prom.metricHelp = "Number of successful S3 Uploads" + } + +s3UploadSize :: (Prom.MonadMonitor m, Integral n) => n -> m () s3UploadSize n = - counterAdd (fromIntegral n) (path "net.s3.upload_size") - =<< view metrics + void $ Prom.addCounter netS3UploadSize (fromIntegral n) + +{-# NOINLINE netS3UploadSize #-} +netS3UploadSize :: Prom.Counter +netS3UploadSize = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "net.s3.upload_size", + Prom.metricHelp = "Number of bytes uploaded successfully uploaded to S3" + } diff --git a/services/cargohold/src/CargoHold/Run.hs b/services/cargohold/src/CargoHold/Run.hs index a3eb898a75c..6106a4c68e9 100644 --- a/services/cargohold/src/CargoHold/Run.hs +++ b/services/cargohold/src/CargoHold/Run.hs @@ -32,7 +32,6 @@ import CargoHold.Options hiding (aws) import Control.Exception (bracket) import Control.Lens ((.~), (^.)) import Control.Monad.Codensity -import Data.Metrics (Metrics) import Data.Metrics.AWS (gaugeTokenRemaing) import Data.Metrics.Servant import Data.Proxy @@ -59,7 +58,7 @@ type CombinedAPI = FederationAPI :<|> CargoholdAPI :<|> InternalAPI run :: Opts -> IO () run o = lowerCodensity $ do (app, e) <- mkApp o - void $ Codensity $ Async.withAsync (collectAuthMetrics (e ^. metrics) (e ^. aws . amazonkaEnv)) + void $ Codensity $ Async.withAsync (collectAuthMetrics (e ^. aws . amazonkaEnv)) liftIO $ do s <- Server.newSettings $ @@ -67,7 +66,6 @@ run o = lowerCodensity $ do (unpack $ o ^. cargohold . host) (o ^. cargohold . port) (e ^. appLogger) - (e ^. metrics) runSettingsWithShutdown s app Nothing mkApp :: Opts -> Codensity IO (Application, Env) @@ -99,10 +97,10 @@ mkApp o = Codensity $ \k -> toServantHandler :: Env -> Handler a -> Servant.Handler a toServantHandler env = liftIO . runHandler env -collectAuthMetrics :: MonadIO m => Metrics -> AWS.Env -> m () -collectAuthMetrics m env = do +collectAuthMetrics :: MonadIO m => AWS.Env -> m () +collectAuthMetrics env = do liftIO $ forever $ do mbRemaining <- readAuthExpiration env - gaugeTokenRemaing m mbRemaining + gaugeTokenRemaing mbRemaining threadDelay 1_000_000 diff --git a/services/federator/federator.cabal b/services/federator/federator.cabal index 1812c4e115b..656ffec5cd2 100644 --- a/services/federator/federator.cabal +++ b/services/federator/federator.cabal @@ -131,7 +131,6 @@ library , imports , kan-extensions , lens - , metrics-core , metrics-wai , mtl , pem diff --git a/services/federator/src/Federator/Env.hs b/services/federator/src/Federator/Env.hs index e15b19f532b..6d13f073ad1 100644 --- a/services/federator/src/Federator/Env.hs +++ b/services/federator/src/Federator/Env.hs @@ -22,7 +22,6 @@ module Federator.Env where import Control.Lens (makeLenses) -import Data.Metrics (Metrics) import Federator.Options (RunSettings) import HTTP2.Client.Manager import Imports @@ -40,8 +39,7 @@ data FederatorMetrics = FederatorMetrics } data Env = Env - { _metrics :: Metrics, - _applog :: LC.Logger, + { _applog :: LC.Logger, _dnsResolver :: Resolver, _runSettings :: RunSettings, _service :: Component -> Endpoint, diff --git a/services/federator/src/Federator/Run.hs b/services/federator/src/Federator/Run.hs index 3ebcb41fbf1..45ae38ad42f 100644 --- a/services/federator/src/Federator/Run.hs +++ b/services/federator/src/Federator/Run.hs @@ -38,7 +38,6 @@ import Control.Concurrent.Async import Control.Exception (bracket) import Control.Lens ((^.)) import Data.Id -import Data.Metrics.Middleware qualified as Metrics import Federator.Env import Federator.ExternalServer (serveInward) import Federator.InternalServer (serveOutward) @@ -91,7 +90,6 @@ run opts = do newEnv :: Opts -> DNS.Resolver -> Log.Logger -> IO Env newEnv o _dnsResolver _applog = do - _metrics <- Metrics.metrics let _requestId = RequestId "N/A" _runSettings = Opt.optSettings o _service Brig = Opt.brig o diff --git a/services/galley/galley.cabal b/services/galley/galley.cabal index b61df20ca8b..0ebe57dc415 100644 --- a/services/galley/galley.cabal +++ b/services/galley/galley.cabal @@ -328,6 +328,7 @@ library , pem , polysemy , polysemy-wire-zoo + , prometheus-client , proto-lens >=0.2 , raw-strings-qq >=1.0 , resourcet >=1.1 diff --git a/services/galley/src/Galley/App.hs b/services/galley/src/Galley/App.hs index 086fac2cad2..4242b7f3cf6 100644 --- a/services/galley/src/Galley/App.hs +++ b/services/galley/src/Galley/App.hs @@ -22,7 +22,6 @@ module Galley.App Env, reqId, options, - monitor, applog, manager, federator, @@ -50,7 +49,6 @@ import Cassandra.Util (initCassandraForService) import Control.Error hiding (err) import Control.Lens hiding ((.=)) import Data.Id -import Data.Metrics.Middleware import Data.Misc import Data.Qualified import Data.Range @@ -162,13 +160,13 @@ validateOptions o = do (Just uri, Nothing) -> pure (Left uri) (Just _, Just _) -> error errMsg -createEnv :: Metrics -> Opts -> Logger -> IO Env -createEnv m o l = do +createEnv :: Opts -> Logger -> IO Env +createEnv o l = do cass <- initCassandra o l mgr <- initHttpManager o h2mgr <- initHttp2Manager codeURIcfg <- validateOptions o - Env (RequestId "N/A") m o l mgr h2mgr (o ^. O.federator) (o ^. O.brig) cass + Env (RequestId "N/A") o l mgr h2mgr (o ^. O.federator) (o ^. O.brig) cass <$> Q.new 16000 <*> initExtEnv <*> maybe (pure Nothing) (fmap Just . Aws.mkEnv l mgr) (o ^. journal) diff --git a/services/galley/src/Galley/Env.hs b/services/galley/src/Galley/Env.hs index 87a0ddbd70f..9d88c703b86 100644 --- a/services/galley/src/Galley/Env.hs +++ b/services/galley/src/Galley/Env.hs @@ -24,7 +24,6 @@ import Cassandra import Control.Lens hiding ((.=)) import Data.ByteString.Conversion (toByteString') import Data.Id -import Data.Metrics.Middleware import Data.Misc (Fingerprint, HttpsUrl, Rsa) import Data.Range import Data.Time.Clock.DiffTime (millisecondsToDiffTime) @@ -52,7 +51,6 @@ data DeleteItem = TeamItem TeamId UserId (Maybe ConnId) -- | Main application environment. data Env = Env { _reqId :: RequestId, - _monitor :: Metrics, _options :: Opts, _applog :: Logger, _manager :: Manager, diff --git a/services/galley/src/Galley/Monad.hs b/services/galley/src/Galley/Monad.hs index 1780f3d827c..f1a30b8b8a0 100644 --- a/services/galley/src/Galley/Monad.hs +++ b/services/galley/src/Galley/Monad.hs @@ -29,6 +29,7 @@ import Galley.Env import Imports hiding (log) import Polysemy import Polysemy.Input +import Prometheus import System.Logger import System.Logger.Class qualified as LC @@ -42,7 +43,8 @@ newtype App a = App {unApp :: ReaderT Env IO a} MonadMask, MonadReader Env, MonadThrow, - MonadUnliftIO + MonadUnliftIO, + MonadMonitor ) runApp :: Env -> App a -> IO a diff --git a/services/galley/src/Galley/Run.hs b/services/galley/src/Galley/Run.hs index 7557497fd23..4ac4bc764ca 100644 --- a/services/galley/src/Galley/Run.hs +++ b/services/galley/src/Galley/Run.hs @@ -32,9 +32,7 @@ import Control.Lens (view, (.~), (^.)) import Control.Monad.Codensity import Data.Aeson qualified as Aeson import Data.ByteString.UTF8 qualified as UTF8 -import Data.Metrics (Metrics) import Data.Metrics.AWS (gaugeTokenRemaing) -import Data.Metrics.Middleware qualified as M import Data.Metrics.Servant import Data.Misc (portNumber) import Data.Singletons @@ -58,6 +56,7 @@ import Network.Wai.Middleware.Gzip qualified as GZip import Network.Wai.Utilities.Error import Network.Wai.Utilities.Request import Network.Wai.Utilities.Server +import Prometheus qualified as Prom import Servant hiding (route) import System.Logger qualified as Log import System.Logger.Extended (mkLogger) @@ -77,10 +76,9 @@ run opts = lowerCodensity $ do (unpack $ opts ^. galley . host) (portNumber $ fromIntegral $ opts ^. galley . port) (env ^. App.applog) - (env ^. monitor) forM_ (env ^. aEnv) $ \aws -> - void $ Codensity $ Async.withAsync $ collectAuthMetrics (env ^. monitor) (aws ^. awsEnv) + void $ Codensity $ Async.withAsync $ collectAuthMetrics (aws ^. awsEnv) void $ Codensity $ Async.withAsync $ runApp env deleteLoop void $ Codensity $ Async.withAsync $ runApp env refreshMetrics @@ -90,8 +88,7 @@ mkApp :: Opts -> Codensity IO (Application, Env) mkApp opts = do logger <- lift $ mkLogger (opts ^. logLevel) (opts ^. logNetStrings) (opts ^. logFormat) - metrics <- lift $ M.metrics - env <- lift $ App.createEnv metrics opts logger + env <- lift $ App.createEnv opts logger lift $ runClient (env ^. cstate) $ versionCheck schemaVersion let middlewares = versionMiddleware (foldMap expandVersionExp (opts ^. settings . disabledAPIVersions)) @@ -179,17 +176,26 @@ type CombinedAPI = refreshMetrics :: App () refreshMetrics = do - m <- view monitor q <- view deleteQueue safeForever "refreshMetrics" $ do n <- Q.len q - M.gaugeSet (fromIntegral n) (M.path "galley.deletequeue.len") m + Prom.setGauge deleteQueueLengthGauge (fromIntegral n) threadDelay 1000000 -collectAuthMetrics :: (MonadIO m) => Metrics -> AWS.Env -> m () -collectAuthMetrics m env = do +{-# NOINLINE deleteQueueLengthGauge #-} +deleteQueueLengthGauge :: Prom.Gauge +deleteQueueLengthGauge = + Prom.unsafeRegister $ + Prom.gauge + Prom.Info + { Prom.metricName = "galley.deletequeue.len", + Prom.metricHelp = "Length of the galley delete queue" + } + +collectAuthMetrics :: (MonadIO m) => AWS.Env -> m () +collectAuthMetrics env = do liftIO $ forever $ do mbRemaining <- readAuthExpiration env - gaugeTokenRemaing m mbRemaining + gaugeTokenRemaing mbRemaining threadDelay 1_000_000 diff --git a/services/gundeck/gundeck.cabal b/services/gundeck/gundeck.cabal index 28d8753c5e1..e2150a6251c 100644 --- a/services/gundeck/gundeck.cabal +++ b/services/gundeck/gundeck.cabal @@ -143,6 +143,7 @@ library , metrics-wai >=0.5.7 , mtl >=2.2 , network-uri >=2.6 + , prometheus-client , psqueues >=0.2.2 , raw-strings-qq , resourcet >=1.1 @@ -309,7 +310,6 @@ executable gundeck-integration , kan-extensions , lens , lens-aeson - , metrics-wai , network , network-uri , optparse-applicative diff --git a/services/gundeck/src/Gundeck/Env.hs b/services/gundeck/src/Gundeck/Env.hs index b9d5f5c073d..8fc8b78abaf 100644 --- a/services/gundeck/src/Gundeck/Env.hs +++ b/services/gundeck/src/Gundeck/Env.hs @@ -27,7 +27,6 @@ import Control.Concurrent.Async (Async) import Control.Lens (makeLenses, (^.)) import Control.Retry (capDelay, exponentialBackoff) import Data.ByteString.Char8 qualified as BSChar8 -import Data.Metrics.Middleware (Metrics) import Data.Misc (Milliseconds (..)) import Data.Text qualified as Text import Data.Time.Clock @@ -50,7 +49,6 @@ import System.Logger.Extended qualified as Logger data Env = Env { _reqId :: !RequestId, - _monitor :: !Metrics, _options :: !Opts, _applog :: !Logger.Logger, _manager :: !Manager, @@ -67,8 +65,8 @@ makeLenses ''Env schemaVersion :: Int32 schemaVersion = 7 -createEnv :: Metrics -> Opts -> IO ([Async ()], Env) -createEnv m o = do +createEnv :: Opts -> IO ([Async ()], Env) +createEnv o = do l <- Logger.mkLogger (o ^. logLevel) (o ^. logNetStrings) (o ^. logFormat) n <- newManager @@ -105,7 +103,7 @@ createEnv m o = do { updateAction = Ms . round . (* 1000) <$> getPOSIXTime } mtbs <- mkThreadBudgetState `mapM` (o ^. settings . maxConcurrentNativePushes) - pure $! (rThread : rAdditionalThreads,) $! Env (RequestId "N/A") m o l n p r rAdditional a io mtbs + pure $! (rThread : rAdditionalThreads,) $! Env (RequestId "N/A") o l n p r rAdditional a io mtbs reqIdMsg :: RequestId -> Logger.Msg -> Logger.Msg reqIdMsg = ("request" Logger..=) . unRequestId diff --git a/services/gundeck/src/Gundeck/Monad.hs b/services/gundeck/src/Gundeck/Monad.hs index 66b234569d3..a3a9207864f 100644 --- a/services/gundeck/src/Gundeck/Monad.hs +++ b/services/gundeck/src/Gundeck/Monad.hs @@ -21,7 +21,6 @@ module Gundeck.Monad ( -- * Environment Env, reqId, - monitor, options, applog, manager, @@ -61,6 +60,7 @@ import Imports import Network.HTTP.Types import Network.Wai import Network.Wai.Utilities +import Prometheus import System.Logger qualified as Log import System.Logger qualified as Logger import System.Logger.Class @@ -84,6 +84,10 @@ newtype Gundeck a = Gundeck MonadUnliftIO ) +-- This can be derived if we resolve the TODO above. +instance MonadMonitor Gundeck where + doIO = liftIO + -- | 'Gundeck' doesn't have an instance for 'MonadRedis' because it contains two -- connections to two redis instances. When using 'WithDefaultRedis', any redis -- operation will only target the default redis instance (configured under diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs index b11785fa770..009052623d0 100644 --- a/services/gundeck/src/Gundeck/Push.hs +++ b/services/gundeck/src/Gundeck/Push.hs @@ -108,10 +108,9 @@ instance MonadPushAll Gundeck where -- | Another layer of wrap around 'runWithBudget'. runWithBudget'' :: Int -> a -> Gundeck a -> Gundeck a runWithBudget'' budget fallback action = do - metrics <- view monitor view threadBudgetState >>= \case Nothing -> action - Just tbs -> runWithBudget' metrics tbs budget fallback action + Just tbs -> runWithBudget' tbs budget fallback action -- | Abstract over all effects in 'nativeTargets' (for unit testing). class Monad m => MonadNativeTargets m where diff --git a/services/gundeck/src/Gundeck/Push/Native.hs b/services/gundeck/src/Gundeck/Push/Native.hs index 752351340d4..917960c4e7e 100644 --- a/services/gundeck/src/Gundeck/Push/Native.hs +++ b/services/gundeck/src/Gundeck/Push/Native.hs @@ -28,7 +28,6 @@ import Control.Monad.Catch import Data.ByteString.Conversion.To import Data.Id import Data.List1 -import Data.Metrics (counterIncr, path) import Data.Set qualified as Set import Data.Text qualified as Text import Data.UUID qualified as UUID @@ -43,6 +42,7 @@ import Gundeck.Push.Native.Types as Types import Gundeck.Types import Gundeck.Util import Imports +import Prometheus qualified as Prom import System.Logger.Class (MonadLogger, field, msg, val, (.=), (~~)) import System.Logger.Class qualified as Log import UnliftIO (handleAny, mapConcurrently, pooledMapConcurrentlyN_) @@ -60,6 +60,66 @@ push m addrs = do -- parallelizing only chunkSize native pushes at a time Just chunkSize -> pooledMapConcurrentlyN_ chunkSize (push1 m) addrs +{-# NOINLINE nativePushSuccessCounter #-} +nativePushSuccessCounter :: Prom.Counter +nativePushSuccessCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "push.native.success", + Prom.metricHelp = "Number of times native pushes were successfully pushed" + } + +{-# NOINLINE nativePushDisabledCounter #-} +nativePushDisabledCounter :: Prom.Counter +nativePushDisabledCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "push.native.disabled", + Prom.metricHelp = "Number of times native pushes were not pushed due to a disabled endpoint" + } + +{-# NOINLINE nativePushInvalidCounter #-} +nativePushInvalidCounter :: Prom.Counter +nativePushInvalidCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "push.native.invalid", + Prom.metricHelp = "Number of times native pushes were not pushed due to an invalid endpoint" + } + +{-# NOINLINE nativePushTooLargeCounter #-} +nativePushTooLargeCounter :: Prom.Counter +nativePushTooLargeCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "push.native.too_large", + Prom.metricHelp = "Number of times native pushes were not pushed due to payload being too large" + } + +{-# NOINLINE nativePushUnauthorizedCounter #-} +nativePushUnauthorizedCounter :: Prom.Counter +nativePushUnauthorizedCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "push.native.unauthorized", + Prom.metricHelp = "Number of times native pushes were not pushed due to an unauthorized endpoint" + } + +{-# NOINLINE nativePushErrorCounter #-} +nativePushErrorCounter :: Prom.Counter +nativePushErrorCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "push.native.errors", + Prom.metricHelp = "Number of times native pushes were not pushed due to an unexpected error" + } + push1 :: NativePush -> Address -> Gundeck () push1 = push1' 0 where @@ -86,7 +146,7 @@ push1 = push1' 0 field "user" (toByteString (a ^. addrUser)) ~~ field "notificationId" (toText (npNotificationid m)) ~~ Log.msg (val "Native push success") - view monitor >>= counterIncr (path "push.native.success") + Prom.incCounter nativePushSuccessCounter onDisabled = handleAny (logError a "Failed to cleanup disabled endpoint") $ do Log.info $ @@ -94,13 +154,13 @@ push1 = push1' 0 ~~ field "arn" (toText (a ^. addrEndpoint)) ~~ field "cause" ("EndpointDisabled" :: Text) ~~ msg (val "Removing disabled endpoint and token") - view monitor >>= counterIncr (path "push.native.disabled") + Prom.incCounter nativePushDisabledCounter Data.delete (a ^. addrUser) (a ^. addrTransport) (a ^. addrApp) (a ^. addrToken) onTokenRemoved e <- view awsEnv Aws.execute e (Aws.deleteEndpoint (a ^. addrEndpoint)) onPayloadTooLarge = do - view monitor >>= counterIncr (path "push.native.too_large") + Prom.incCounter nativePushTooLargeCounter Log.warn $ field "user" (toByteString (a ^. addrUser)) ~~ field "arn" (toText (a ^. addrEndpoint)) @@ -112,7 +172,7 @@ push1 = push1' 0 ~~ field "arn" (toText (a ^. addrEndpoint)) ~~ field "cause" ("InvalidEndpoint" :: Text) ~~ msg (val "Invalid ARN. Deleting orphaned push token") - view monitor >>= counterIncr (path "push.native.invalid") + Prom.incCounter nativePushInvalidCounter Data.delete (a ^. addrUser) (a ^. addrTransport) (a ^. addrApp) (a ^. addrToken) onTokenRemoved retryUnauthorisedThreshold = 1 @@ -147,10 +207,10 @@ push1 = push1' 0 ~~ field "arn" (toText (a ^. addrEndpoint)) ~~ field "cause" ("UnauthorisedEndpoint" :: Text) ~~ msg (val "Invalid ARN. Dropping push message.") - view monitor >>= counterIncr (path "push.native.unauthorized") + Prom.incCounter nativePushUnauthorizedCounter onPushException ex = do logError a "Native push failed" ex - view monitor >>= counterIncr (path "push.native.errors") + Prom.incCounter nativePushErrorCounter onTokenRemoved = do i <- mkNotificationId let c = a ^. addrClient diff --git a/services/gundeck/src/Gundeck/Push/Websocket.hs b/services/gundeck/src/Gundeck/Push/Websocket.hs index 64a51c5f9d9..e6b8f2121b5 100644 --- a/services/gundeck/src/Gundeck/Push/Websocket.hs +++ b/services/gundeck/src/Gundeck/Push/Websocket.hs @@ -36,7 +36,6 @@ import Data.ByteString.Lazy qualified as L import Data.Id import Data.List1 import Data.Map qualified as Map -import Data.Metrics qualified as Metrics import Data.Misc (Milliseconds (..)) import Data.Set qualified as Set import Data.Time.Clock.POSIX @@ -49,6 +48,7 @@ import Network.HTTP.Client (HttpExceptionContent (..)) import Network.HTTP.Client.Internal qualified as Http import Network.HTTP.Types (StdMethod (POST), status200, status410) import Network.URI qualified as URI +import Prometheus qualified as Prom import System.Logger.Class (val, (+++), (~~)) import System.Logger.Class qualified as Log import UnliftIO (handleAny, mapConcurrently) @@ -101,14 +101,21 @@ bulkPush notifs = do -- | log all cannons with response status @/= 200@. monitorBadCannons :: - (MonadIO m, MonadReader Env m) => + (Prom.MonadMonitor m) => (uri, (error, [Presence])) -> m () -monitorBadCannons (_uri, (_err, prcs)) = do - view monitor - >>= Metrics.counterAdd - (fromIntegral $ length prcs) - (Metrics.path "push.ws.unreachable") +monitorBadCannons (_uri, (_err, prcs)) = + void $ Prom.addCounter pushWsUnreachableCounter (fromIntegral $ length prcs) + +{-# NOINLINE pushWsUnreachableCounter #-} +pushWsUnreachableCounter :: Prom.Counter +pushWsUnreachableCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "push.ws.unreachable", + Prom.metricHelp = "Number of times websocket pushes were not pushed due cannon being unreachable" + } logBadCannons :: Log.MonadLogger m => (URI, (SomeException, [Presence])) -> m () logBadCannons (uri, (err, prcs)) = do @@ -343,7 +350,7 @@ push notif (toList -> tgts) originUser originConn conns = do Log.debug $ logPresence p ~~ Log.msg (val "WebSocket presence gone") pure (ok, p : gone) onResult (ok, gone) (PushFailure p _) = do - view monitor >>= Metrics.counterIncr (Metrics.path "push.ws.unreachable") + Prom.incCounter pushWsUnreachableCounter Log.info $ logPresence p ~~ Log.field "created_at" (ms $ createdAt p) diff --git a/services/gundeck/src/Gundeck/Run.hs b/services/gundeck/src/Gundeck/Run.hs index 6702c9045d4..8dcfc3b764c 100644 --- a/services/gundeck/src/Gundeck/Run.hs +++ b/services/gundeck/src/Gundeck/Run.hs @@ -26,9 +26,7 @@ import Control.Error (ExceptT (ExceptT)) import Control.Exception (finally) import Control.Lens ((.~), (^.)) import Control.Monad.Extra -import Data.Metrics (Metrics) import Data.Metrics.AWS (gaugeTokenRemaing) -import Data.Metrics.Middleware (metrics) import Data.Metrics.Middleware.Prometheus (waiPrometheusMiddleware) import Data.Proxy (Proxy (Proxy)) import Data.Text (unpack) @@ -59,17 +57,16 @@ import Wire.API.Routes.Version.Wai run :: Opts -> IO () run o = do - m <- metrics - (rThreads, e) <- createEnv m o + (rThreads, e) <- createEnv o runClient (e ^. cstate) $ versionCheck schemaVersion let l = e ^. applog - s <- newSettings $ defaultServer (unpack $ o ^. gundeck . host) (o ^. gundeck . port) l m + s <- newSettings $ defaultServer (unpack $ o ^. gundeck . host) (o ^. gundeck . port) l let throttleMillis = fromMaybe defSqsThrottleMillis $ o ^. (settings . sqsThrottleMillis) lst <- Async.async $ Aws.execute (e ^. awsEnv) (Aws.listen throttleMillis (runDirect e . onEvent)) - wtbs <- forM (e ^. threadBudgetState) $ \tbs -> Async.async $ runDirect e $ watchThreadBudgetState m tbs 10 - wCollectAuth <- Async.async (collectAuthMetrics m (Aws._awsEnv (Env._awsEnv e))) + wtbs <- forM (e ^. threadBudgetState) $ \tbs -> Async.async $ runDirect e $ watchThreadBudgetState tbs 10 + wCollectAuth <- Async.async (collectAuthMetrics (Aws._awsEnv (Env._awsEnv e))) let app = middleware e $ mkApp e runSettingsWithShutdown s app Nothing `finally` do @@ -113,10 +110,10 @@ servantSitemap' env = Servant.hoistServer (Proxy @GundeckAPI) toServantHandler s toServantHandler :: Gundeck a -> Handler a toServantHandler m = Handler . ExceptT $ Right <$> runDirect env m -collectAuthMetrics :: MonadIO m => Metrics -> AWS.Env -> m () -collectAuthMetrics m env = do +collectAuthMetrics :: MonadIO m => AWS.Env -> m () +collectAuthMetrics env = do liftIO $ forever $ do mbRemaining <- readAuthExpiration env - gaugeTokenRemaing m mbRemaining + gaugeTokenRemaing mbRemaining threadDelay 1_000_000 diff --git a/services/gundeck/src/Gundeck/ThreadBudget/Internal.hs b/services/gundeck/src/Gundeck/ThreadBudget/Internal.hs index 4f311bb072c..cccfea4fdf6 100644 --- a/services/gundeck/src/Gundeck/ThreadBudget/Internal.hs +++ b/services/gundeck/src/Gundeck/ThreadBudget/Internal.hs @@ -25,14 +25,14 @@ import Control.Exception.Safe (catchAny) import Control.Lens import Control.Monad.Catch (MonadCatch) import Data.HashMap.Strict qualified as HM -import Data.Metrics (Metrics, counterIncr) -import Data.Metrics.Middleware (gaugeSet, path) import Data.Set qualified as Set import Data.Time import Data.UUID (UUID, toText) import Data.UUID.V4 (nextRandom) import Gundeck.Options import Imports +import Prometheus (MonadMonitor) +import Prometheus qualified as Prom import System.Logger.Class qualified as LC import UnliftIO.Async import UnliftIO.Exception (finally) @@ -112,26 +112,24 @@ unregister ref key = -- update the budget. runWithBudget :: forall m. - (LC.MonadLogger m, MonadUnliftIO m) => - Metrics -> + (LC.MonadLogger m, MonadUnliftIO m, MonadMonitor m) => ThreadBudgetState -> Int -> m () -> m () -runWithBudget metrics tbs spent = runWithBudget' metrics tbs spent () +runWithBudget tbs spent = runWithBudget' tbs spent () -- | More flexible variant of 'runWithBudget' that allows the action to return a value. With -- a default in case of budget exhaustion. runWithBudget' :: forall m a. - (MonadIO m, LC.MonadLogger m, MonadUnliftIO m) => - Metrics -> + (MonadIO m, LC.MonadLogger m, MonadUnliftIO m, MonadMonitor m) => ThreadBudgetState -> Int -> a -> m a -> m a -runWithBudget' metrics (ThreadBudgetState limits ref) spent fallback action = do +runWithBudget' (ThreadBudgetState limits ref) spent fallback action = do key <- liftIO nextRandom (`finally` unregister ref key) $ do oldsize <- allocate ref key spent @@ -155,9 +153,12 @@ runWithBudget' metrics (ThreadBudgetState limits ref) spent fallback action = do warnNoBudget :: Bool -> Bool -> Int -> m () warnNoBudget False False _ = pure () warnNoBudget soft' hard' oldsize = do - let limit = if hard' then "hard" else "soft" - metric = "net.nativepush." <> limit <> "_limit_breached" - counterIncr (path metric) metrics + let limit :: ByteString = if hard' then "hard" else "soft" + counter = + if hard' + then threadBudgetHardLimitBreachedCounter + else threadBudgetSoftLimitBreachedCounter + Prom.incCounter counter LC.warn $ "spent" LC..= show oldsize LC.~~ "soft-breach" LC..= soft' @@ -174,30 +175,78 @@ runWithBudget' metrics (ThreadBudgetState limits ref) spent fallback action = do -- Also, issue some metrics. watchThreadBudgetState :: forall m. - (MonadIO m, LC.MonadLogger m, MonadCatch m) => - Metrics -> + (MonadIO m, LC.MonadLogger m, MonadCatch m, MonadMonitor m) => ThreadBudgetState -> NominalDiffTime -> m () -watchThreadBudgetState metrics (ThreadBudgetState limits ref) freq = safeForever $ do - recordMetrics metrics limits ref +watchThreadBudgetState (ThreadBudgetState limits ref) freq = safeForever $ do + recordMetrics limits ref removeStaleHandles ref threadDelayNominalDiffTime freq recordMetrics :: forall m. - MonadIO m => - Metrics -> + (MonadIO m, MonadMonitor m) => MaxConcurrentNativePushes -> IORef BudgetMap -> m () -recordMetrics metrics limits ref = do +recordMetrics limits ref = do (BudgetMap spent _) <- readIORef ref - gaugeSet (fromIntegral spent) (path "net.nativepush.thread_budget_allocated") metrics + Prom.setGauge threadBudgetAllocatedGauge (fromIntegral spent) forM_ (limits ^. hard) $ \lim -> - gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_hard_limit") metrics + Prom.setGauge threadBudgetHardLimitGauge (fromIntegral lim) forM_ (limits ^. soft) $ \lim -> - gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_soft_limit") metrics + Prom.setGauge threadBudgetSoftLimitGauge (fromIntegral lim) + +{-# NOINLINE threadBudgetAllocatedGauge #-} +threadBudgetAllocatedGauge :: Prom.Gauge +threadBudgetAllocatedGauge = + Prom.unsafeRegister $ + Prom.gauge + Prom.Info + { Prom.metricName = "net.nativepush.thread_budget_allocated", + Prom.metricHelp = "Number of allocated threads for native pushes" + } + +{-# NOINLINE threadBudgetHardLimitGauge #-} +threadBudgetHardLimitGauge :: Prom.Gauge +threadBudgetHardLimitGauge = + Prom.unsafeRegister $ + Prom.gauge + Prom.Info + { Prom.metricName = "net.nativepush.thread_budget_hard_limit", + Prom.metricHelp = "Hard limit for threads for native pushes" + } + +{-# NOINLINE threadBudgetSoftLimitGauge #-} +threadBudgetSoftLimitGauge :: Prom.Gauge +threadBudgetSoftLimitGauge = + Prom.unsafeRegister $ + Prom.gauge + Prom.Info + { Prom.metricName = "net.nativepush.thread_budget_soft_limit", + Prom.metricHelp = "Soft limit for threads for native pushes" + } + +{-# NOINLINE threadBudgetHardLimitBreachedCounter #-} +threadBudgetHardLimitBreachedCounter :: Prom.Counter +threadBudgetHardLimitBreachedCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "net.nativepush.thread_budget_hard_limit_breached", + Prom.metricHelp = "Number of times hard limit for threads for native pushes was breached" + } + +{-# NOINLINE threadBudgetSoftLimitBreachedCounter #-} +threadBudgetSoftLimitBreachedCounter :: Prom.Counter +threadBudgetSoftLimitBreachedCounter = + Prom.unsafeRegister $ + Prom.counter + Prom.Info + { Prom.metricName = "net.nativepush.thread_budget_soft_limit_breached", + Prom.metricHelp = "Number of times soft limit for threads for native pushes was breached" + } threadDelayNominalDiffTime :: NominalDiffTime -> MonadIO m => m () threadDelayNominalDiffTime = threadDelay . round . (* 1000000) . toRational diff --git a/services/gundeck/test/integration/Util.hs b/services/gundeck/test/integration/Util.hs index b28aa32b50a..0bce9203d72 100644 --- a/services/gundeck/test/integration/Util.hs +++ b/services/gundeck/test/integration/Util.hs @@ -8,7 +8,6 @@ import Control.Lens import Control.Monad.Catch import Control.Monad.Codensity import Data.ByteString qualified as S -import Data.Metrics.Middleware (metrics) import Data.Text qualified as Text import Gundeck.Env (createEnv) import Gundeck.Options @@ -23,8 +22,7 @@ withSettingsOverrides :: (Opts -> Opts) -> TestM a -> TestM a withSettingsOverrides f action = do ts <- ask let opts = f (view tsOpts ts) - m <- metrics - (_rThreads, env) <- liftIO $ createEnv m opts + (_rThreads, env) <- liftIO $ createEnv opts liftIO . lowerCodensity $ do let app = mkApp env p <- withMockServer app diff --git a/services/gundeck/test/unit/ThreadBudget.hs b/services/gundeck/test/unit/ThreadBudget.hs index f9f21656aa3..0627c91d436 100644 --- a/services/gundeck/test/unit/ThreadBudget.hs +++ b/services/gundeck/test/unit/ThreadBudget.hs @@ -29,7 +29,6 @@ module ThreadBudget where import Control.Concurrent.Async import Control.Lens import Control.Monad.Catch (MonadCatch, catch) -import Data.Metrics.Middleware (metrics) import Data.String.Conversions import Data.Time import GHC.Generics @@ -127,17 +126,15 @@ burstActions :: NumberOfThreads -> (MonadIO m) => m () burstActions tbs logHistory howlong (NumberOfThreads howmany) = do - mtr <- metrics - let budgeted = runWithBudget mtr tbs 1 (delayms howlong) + let budgeted = runWithBudget tbs 1 (delayms howlong) liftIO . replicateM_ howmany . forkIO $ runReaderT budgeted logHistory -- | Start a watcher with given params and a frequency of 10 milliseconds, so we are more -- likely to find weird race conditions. mkWatcher :: ThreadBudgetState -> LogHistory -> IO (Async ()) mkWatcher tbs logHistory = do - mtr <- metrics async $ - runReaderT (watchThreadBudgetState mtr tbs 0.01) logHistory + runReaderT (watchThreadBudgetState tbs 0.01) logHistory `catch` \AsyncCancelled -> pure () ---------------------------------------------------------------------- diff --git a/services/proxy/src/Proxy/Env.hs b/services/proxy/src/Proxy/Env.hs index d8850dab273..d429787d1be 100644 --- a/services/proxy/src/Proxy/Env.hs +++ b/services/proxy/src/Proxy/Env.hs @@ -22,7 +22,6 @@ module Proxy.Env createEnv, destroyEnv, reqId, - monitor, options, applog, manager, @@ -34,7 +33,6 @@ import Control.Lens (makeLenses, (^.)) import Data.Configurator import Data.Configurator.Types import Data.Id (RequestId (..)) -import Data.Metrics.Middleware (Metrics) import Imports import Network.HTTP.Client import Network.HTTP.Client.TLS (tlsManagerSettings) @@ -43,7 +41,6 @@ import System.Logger.Extended qualified as Logger data Env = Env { _reqId :: !RequestId, - _monitor :: !Metrics, _options :: !Opts, _applog :: !Logger.Logger, _manager :: !Manager, @@ -53,8 +50,8 @@ data Env = Env makeLenses ''Env -createEnv :: Metrics -> Opts -> IO Env -createEnv m o = do +createEnv :: Opts -> IO Env +createEnv o = do g <- Logger.mkLogger (o ^. logLevel) (o ^. logNetStrings) (o ^. logFormat) n <- newManager @@ -66,7 +63,7 @@ createEnv m o = do let ac = AutoConfig 60 (reloadError g) (c, t) <- autoReload ac [Required $ o ^. secretsConfig] let rid = RequestId "N/A" - pure $! Env rid m o g n c t + pure $! Env rid o g n c t where reloadError g x = Logger.err g (Logger.msg $ Logger.val "Failed reloading config: " Logger.+++ show x) diff --git a/services/proxy/src/Proxy/Run.hs b/services/proxy/src/Proxy/Run.hs index 95881999e95..16d43994006 100644 --- a/services/proxy/src/Proxy/Run.hs +++ b/services/proxy/src/Proxy/Run.hs @@ -22,7 +22,6 @@ where import Control.Lens hiding ((.=)) import Control.Monad.Catch -import Data.Metrics.Middleware hiding (path) import Data.Metrics.Middleware.Prometheus (waiPrometheusMiddleware) import Imports hiding (head) import Network.Wai.Middleware.Gunzip qualified as GZip @@ -36,9 +35,8 @@ import Wire.API.Routes.Version.Wai run :: Opts -> IO () run o = do - m <- metrics - e <- createEnv m o - s <- newSettings $ defaultServer (o ^. host) (o ^. port) (e ^. applog) m + e <- createEnv o + s <- newSettings $ defaultServer (o ^. host) (o ^. port) (e ^. applog) let rtree = compile (sitemap e) let app r k = runProxy e r (route rtree r k) let middleware = diff --git a/tools/stern/src/Stern/API.hs b/tools/stern/src/Stern/API.hs index a20aa359db9..154b861e8ab 100644 --- a/tools/stern/src/Stern/API.hs +++ b/tools/stern/src/Stern/API.hs @@ -84,7 +84,7 @@ start o = do Server.runSettingsWithShutdown s (requestIdMiddleware (e ^. applog) defaultRequestIdHeaderName $ servantApp e) Nothing where server :: Env -> Server.Server - server e = Server.defaultServer (unpack $ stern o ^. host) (stern o ^. port) (e ^. applog) (e ^. metrics) + server e = Server.defaultServer (unpack $ stern o ^. host) (stern o ^. port) (e ^. applog) servantApp :: Env -> Application servantApp e0 req cont = do diff --git a/tools/stern/src/Stern/App.hs b/tools/stern/src/Stern/App.hs index eccffa864f0..6042f6b88c5 100644 --- a/tools/stern/src/Stern/App.hs +++ b/tools/stern/src/Stern/App.hs @@ -33,7 +33,6 @@ import Control.Monad.Reader.Class import Control.Monad.Trans.Class import Data.ByteString.Conversion (toByteString') import Data.Id -import Data.Metrics.Middleware qualified as Metrics import Data.Text.Encoding (encodeUtf8) import Data.UUID (toString) import Data.UUID.V4 qualified as UUID @@ -59,7 +58,6 @@ data Env = Env _ibis :: !Bilge.Request, _galeb :: !Bilge.Request, _applog :: !Logger, - _metrics :: !Metrics.Metrics, _requestId :: !Bilge.RequestId, _httpManager :: !Bilge.Manager } @@ -68,9 +66,8 @@ makeLenses ''Env newEnv :: Opts -> IO Env newEnv o = do - mt <- Metrics.metrics l <- Log.mkLogger (O.logLevel o) (O.logNetStrings o) (O.logFormat o) - Env (mkRequest $ O.brig o) (mkRequest $ O.galley o) (mkRequest $ O.gundeck o) (mkRequest $ O.ibis o) (mkRequest $ O.galeb o) l mt (RequestId "N/A") + Env (mkRequest $ O.brig o) (mkRequest $ O.galley o) (mkRequest $ O.gundeck o) (mkRequest $ O.ibis o) (mkRequest $ O.galeb o) l (RequestId "N/A") <$> newManager where mkRequest s = Bilge.host (encodeUtf8 (s ^. host)) . Bilge.port (s ^. port) $ Bilge.empty diff --git a/tools/stern/stern.cabal b/tools/stern/stern.cabal index 9d3634cccc2..e7572f7c330 100644 --- a/tools/stern/stern.cabal +++ b/tools/stern/stern.cabal @@ -87,7 +87,6 @@ library , http-types >=0.8 , imports , lens >=4.4 - , metrics-wai >=0.3 , mtl >=2.1 , openapi3 , schema-profunctor From 6896525c9aed0d6026cb21a276d6898c88147c80 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 12 Jun 2024 10:29:18 +0200 Subject: [PATCH 3/5] federator: Enable GC metrics --- services/federator/federator.cabal | 1 + services/federator/src/Federator/Run.hs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/services/federator/federator.cabal b/services/federator/federator.cabal index 656ffec5cd2..1812c4e115b 100644 --- a/services/federator/federator.cabal +++ b/services/federator/federator.cabal @@ -131,6 +131,7 @@ library , imports , kan-extensions , lens + , metrics-core , metrics-wai , mtl , pem diff --git a/services/federator/src/Federator/Run.hs b/services/federator/src/Federator/Run.hs index 45ae38ad42f..c02d9f25f7d 100644 --- a/services/federator/src/Federator/Run.hs +++ b/services/federator/src/Federator/Run.hs @@ -38,6 +38,7 @@ import Control.Concurrent.Async import Control.Exception (bracket) import Control.Lens ((^.)) import Data.Id +import Data.Metrics.GC import Federator.Env import Federator.ExternalServer (serveInward) import Federator.InternalServer (serveOutward) @@ -59,6 +60,7 @@ import Wire.Network.DNS.Helper qualified as DNS -- FUTUREWORK(federation): Add metrics and status endpoints run :: Opts -> IO () run opts = do + spawnGCMetricsCollector let resolvConf = mkResolvConf (optSettings opts) DNS.defaultResolvConf DNS.withCachingResolver resolvConf $ \res -> do logger <- LogExt.mkLogger (Opt.logLevel opts) (Opt.logNetStrings opts) (Opt.logFormat opts) From b6d253bb880e7d17e0c9fdc62975acda8e515833 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 12 Jun 2024 10:30:24 +0200 Subject: [PATCH 4/5] regen nix --- libs/metrics-core/default.nix | 8 -------- libs/metrics-wai/default.nix | 4 ---- libs/wai-utilities/default.nix | 2 -- services/background-worker/default.nix | 2 -- services/brig/default.nix | 2 ++ services/cannon/default.nix | 2 ++ services/cargohold/default.nix | 2 ++ services/galley/default.nix | 2 ++ services/gundeck/default.nix | 3 ++- tools/stern/default.nix | 2 -- 10 files changed, 10 insertions(+), 19 deletions(-) diff --git a/libs/metrics-core/default.nix b/libs/metrics-core/default.nix index f3eab69051a..b7e369144f6 100644 --- a/libs/metrics-core/default.nix +++ b/libs/metrics-core/default.nix @@ -4,16 +4,12 @@ # dependencies are added or removed. { mkDerivation , base -, containers , gitignoreSource -, hashable , immortal , imports , lib , prometheus-client -, text , time -, unordered-containers }: mkDerivation { pname = "metrics-core"; @@ -21,14 +17,10 @@ mkDerivation { src = gitignoreSource ./.; libraryHaskellDepends = [ base - containers - hashable immortal imports prometheus-client - text time - unordered-containers ]; description = "Metrics core"; license = lib.licenses.agpl3Only; diff --git a/libs/metrics-wai/default.nix b/libs/metrics-wai/default.nix index eb65cf447ae..eb3a260e929 100644 --- a/libs/metrics-wai/default.nix +++ b/libs/metrics-wai/default.nix @@ -9,10 +9,8 @@ , gitignoreSource , hspec , hspec-discover -, http-types , imports , lib -, metrics-core , servant , servant-multipart , text @@ -30,9 +28,7 @@ mkDerivation { base bytestring containers - http-types imports - metrics-core servant servant-multipart text diff --git a/libs/wai-utilities/default.nix b/libs/wai-utilities/default.nix index 1c893b72f76..db4b25e4fb6 100644 --- a/libs/wai-utilities/default.nix +++ b/libs/wai-utilities/default.nix @@ -19,7 +19,6 @@ , kan-extensions , lib , metrics-core -, metrics-wai , openapi3 , pipes , prometheus-client @@ -55,7 +54,6 @@ mkDerivation { imports kan-extensions metrics-core - metrics-wai openapi3 pipes prometheus-client diff --git a/services/background-worker/default.nix b/services/background-worker/default.nix index 3698011087d..6ccf66f8ac7 100644 --- a/services/background-worker/default.nix +++ b/services/background-worker/default.nix @@ -21,7 +21,6 @@ , http2-manager , imports , lib -, metrics-core , metrics-wai , monad-control , prometheus-client @@ -60,7 +59,6 @@ mkDerivation { http-client http2-manager imports - metrics-core metrics-wai monad-control prometheus-client diff --git a/services/brig/default.nix b/services/brig/default.nix index a7c6a8cfdb9..a9e10f5c07d 100644 --- a/services/brig/default.nix +++ b/services/brig/default.nix @@ -95,6 +95,7 @@ , polysemy-wire-zoo , postie , process +, prometheus-client , proto-lens , QuickCheck , random @@ -243,6 +244,7 @@ mkDerivation { polysemy-plugin polysemy-time polysemy-wire-zoo + prometheus-client proto-lens random-shuffle raw-strings-qq diff --git a/services/cannon/default.nix b/services/cannon/default.nix index 2161483f93f..9278d2c1c94 100644 --- a/services/cannon/default.nix +++ b/services/cannon/default.nix @@ -26,6 +26,7 @@ , lib , metrics-wai , mwc-random +, prometheus-client , QuickCheck , random , retry @@ -77,6 +78,7 @@ mkDerivation { lens-family-core metrics-wai mwc-random + prometheus-client retry safe-exceptions servant-conduit diff --git a/services/cargohold/default.nix b/services/cargohold/default.nix index 58b2e770a30..32c9e73b371 100644 --- a/services/cargohold/default.nix +++ b/services/cargohold/default.nix @@ -43,6 +43,7 @@ , mmorph , mtl , optparse-applicative +, prometheus-client , resourcet , retry , safe @@ -108,6 +109,7 @@ mkDerivation { metrics-core metrics-wai mime + prometheus-client resourcet retry servant diff --git a/services/galley/default.nix b/services/galley/default.nix index 26be21dac96..e85dbccf854 100644 --- a/services/galley/default.nix +++ b/services/galley/default.nix @@ -68,6 +68,7 @@ , polysemy , polysemy-wire-zoo , process +, prometheus-client , proto-lens , protobuf , QuickCheck @@ -179,6 +180,7 @@ mkDerivation { pem polysemy polysemy-wire-zoo + prometheus-client proto-lens raw-strings-qq resourcet diff --git a/services/gundeck/default.nix b/services/gundeck/default.nix index 4fe37c9149d..b925700365e 100644 --- a/services/gundeck/default.nix +++ b/services/gundeck/default.nix @@ -47,6 +47,7 @@ , network , network-uri , optparse-applicative +, prometheus-client , psqueues , QuickCheck , quickcheck-instances @@ -123,6 +124,7 @@ mkDerivation { metrics-wai mtl network-uri + prometheus-client psqueues raw-strings-qq resourcet @@ -168,7 +170,6 @@ mkDerivation { kan-extensions lens lens-aeson - metrics-wai network network-uri optparse-applicative diff --git a/tools/stern/default.nix b/tools/stern/default.nix index 5c9adf4ce7d..8ccf0f63f20 100644 --- a/tools/stern/default.nix +++ b/tools/stern/default.nix @@ -24,7 +24,6 @@ , lens , lens-aeson , lib -, metrics-wai , mtl , openapi3 , optparse-applicative @@ -74,7 +73,6 @@ mkDerivation { http-types imports lens - metrics-wai mtl openapi3 schema-profunctor From 834ef5b398794109551f5d6c5cfd9b441f85b851 Mon Sep 17 00:00:00 2001 From: Akshay Mankar Date: Wed, 12 Jun 2024 10:32:22 +0200 Subject: [PATCH 5/5] Changelog --- changelog.d/5-internal/federator-metrics | 1 + changelog.d/5-internal/metrics-core | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/5-internal/federator-metrics create mode 100644 changelog.d/5-internal/metrics-core diff --git a/changelog.d/5-internal/federator-metrics b/changelog.d/5-internal/federator-metrics new file mode 100644 index 00000000000..d2453989684 --- /dev/null +++ b/changelog.d/5-internal/federator-metrics @@ -0,0 +1 @@ +federator: Add metrics for garbage collections and unexpected errors that were caught \ No newline at end of file diff --git a/changelog.d/5-internal/metrics-core b/changelog.d/5-internal/metrics-core new file mode 100644 index 00000000000..f9b39a5a634 --- /dev/null +++ b/changelog.d/5-internal/metrics-core @@ -0,0 +1 @@ +metrics-core: Delete `Data.Metrics` in favour of defining metrics closer to where they are being emitted \ No newline at end of file