From de603a385516bbdd407396c381d71e040c7df301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Makie=C5=82a?= Date: Mon, 22 Jun 2020 19:05:07 +0200 Subject: [PATCH] Add healthcheck endpoint * Added healthcheck controller * Added tests * Created documentation for this endpoint * Updated Sparrow dependency --- README.md | 27 +++ .../controllers/healthcheck_controller.ex | 58 +++++++ lib/mongoose_push_web/router.ex | 1 + mix.exs | 2 +- mix.lock | 2 +- test/support/api.ex | 2 +- test/unit/healthcheck_test.exs | 159 ++++++++++++++++++ 7 files changed, 248 insertions(+), 3 deletions(-) create mode 100644 lib/mongoose_push_web/controllers/healthcheck_controller.ex create mode 100644 test/unit/healthcheck_test.exs diff --git a/README.md b/README.md index cfe2f9e7..83417055 100644 --- a/README.md +++ b/README.md @@ -499,6 +499,33 @@ If you use dockerized MongoosePush, you need to do the following: * Modify the `sys.config` as you see fit (for metrics, see above) * Stop MongoosePush docker container and restart it with the modified `sys.config` as volume in `/opt/app/sys.config` (yes, this is not the path we used to copy this file from, this is an override) +### Healthcheck + +MongoosePush exposes `/healthcheck` endpoint, from which you can get information about current status of connections in a `JSON` format, grouped by connection pool. An example with 2 pools, one being connected to the service and the other one not would look like this: + +```json +[ +{ + "pool": "pool_name1", + "connection_status": + [ + "connected", + "connected" + ] +}, +{ + "pool": "pool_name2", + "connection_status": + [ + "disconnected", + "disconnected", + "disconnected" + ] +} +] +``` + +Please note that it's not recommended to use this frequently as it puts an extra load on the worker processes. ### Available metrics diff --git a/lib/mongoose_push_web/controllers/healthcheck_controller.ex b/lib/mongoose_push_web/controllers/healthcheck_controller.ex new file mode 100644 index 00000000..b0be8f94 --- /dev/null +++ b/lib/mongoose_push_web/controllers/healthcheck_controller.ex @@ -0,0 +1,58 @@ +defmodule MongoosePushWeb.HealthcheckController do + use MongoosePushWeb, :controller + + def send(conn = %Plug.Conn{}, %{}) do + stats = :wpool.stats() + + payload = + stats + |> Enum.map(&extract_connection_info_from_pool/1) + + status = get_status(payload) + + conn + |> put_status(status) + |> json(payload) + end + + defp extract_connection_info_from_pool(pool) do + pool_pid = pool[:supervisor] + children = Supervisor.which_children(pool_pid) + {_, sup_pid, _, _} = List.keyfind(children, [:wpool_process_sup], 3) + workers = Supervisor.which_children(sup_pid) + + connections = + workers + |> Enum.map(fn worker_info -> + {_, worker_pid, _, _} = worker_info + + if Sparrow.H2Worker.is_alive_connection(worker_pid) do + :connected + else + :disconnected + end + end) + + %{ + pool: pool[:pool], + connection_status: connections + } + end + + defp get_status(connections) do + is_everything_disconnected = + Enum.all?(connections, fn pool_info -> + Enum.all?(pool_info[:connection_status], fn + status -> status == :disconnected + end) + end) + + case is_everything_disconnected do + true -> + 503 + + false -> + 200 + end + end +end diff --git a/lib/mongoose_push_web/router.ex b/lib/mongoose_push_web/router.ex index 6ab2d43e..3675ade1 100644 --- a/lib/mongoose_push_web/router.ex +++ b/lib/mongoose_push_web/router.ex @@ -22,6 +22,7 @@ defmodule MongoosePushWeb.Router do pipe_through(:api) get("/metrics", MongoosePushWeb.PrometheusMetricsController, :send) + get("/healthcheck", MongoosePushWeb.HealthcheckController, :send) end scope "/v1", MongoosePushWeb.APIv1 do diff --git a/mix.exs b/mix.exs index 5834f6a2..a538cdd7 100644 --- a/mix.exs +++ b/mix.exs @@ -31,7 +31,7 @@ defmodule MongoosePush.Mixfile do defp deps do [ {:chatterbox, github: "joedevivo/chatterbox", ref: "1f4ce4f", override: true}, - {:sparrow, github: "esl/sparrow", ref: "b1896ca"}, + {:sparrow, github: "esl/sparrow", ref: "80a17bd"}, {:plug_cowboy, "~> 2.0"}, {:cowboy, "~> 2.3", override: true}, {:jason, "~> 1.0"}, diff --git a/mix.lock b/mix.lock index c1d28162..8bad7bb6 100644 --- a/mix.lock +++ b/mix.lock @@ -48,7 +48,7 @@ "pollution": {:hex, :pollution, "0.9.2", "3f67542631071c99f807d2a8f9da799c07cd983c902f5357b9e1569c20a26e76", [:mix], [], "hexpm", "6399fd8ffd97dcc3d9d277f60542a234d644d7bcc0d48c8fda93d6be4801bac2"}, "quixir": {:hex, :quixir, "0.9.3", "f01c37386b9e1d0526f01a8734a6d7884af294a0ec360f05c24c7171d74632bd", [:mix], [{:pollution, "~> 0.9.2", [hex: :pollution, repo: "hexpm", optional: false]}], "hexpm", "4f3a1fe7c82b767d935b3f7b94cf34b91ef78bb487ef256b303d77417fc7d589"}, "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"}, - "sparrow": {:git, "https://github.com/esl/sparrow.git", "b1896ca4fb0ca18369dd62de3d4c82f14f5bc66e", [ref: "b1896ca"]}, + "sparrow": {:git, "https://github.com/esl/sparrow.git", "80a17bdc9a2289229a646eb0bd26a162b39cbf83", [ref: "80a17bd"]}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.5", "6eaf7ad16cb568bb01753dbbd7a95ff8b91c7979482b95f38443fe2c8852a79b", [:make, :mix, :rebar3], [], "hexpm", "13104d7897e38ed7f044c4de953a6c28597d1c952075eb2e328bc6d6f2bfc496"}, "telemetry": {:hex, :telemetry, "0.4.1", "ae2718484892448a24470e6aa341bc847c3277bfb8d4e9289f7474d752c09c7f", [:rebar3], [], "hexpm", "4738382e36a0a9a2b6e25d67c960e40e1a2c95560b9f936d8e29de8cd858480f"}, "telemetry_metrics": {:hex, :telemetry_metrics, "0.4.2", "1de986fad9aa6bf81f8a33ddfd16e5d8ab0dec6272e624eb517c1a92a44d41a9", [:mix], [{:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e56ffed2dbe293ab6cf7c94980faeb368cb360662c1927f54fc634a4ca55362e"}, diff --git a/test/support/api.ex b/test/support/api.ex index 43cef1af..207e4e57 100644 --- a/test/support/api.ex +++ b/test/support/api.ex @@ -30,7 +30,7 @@ defmodule MongoosePush.Support.API do def get(path) do %Response{status_code: status, headers: headers, body: body} = - HTTPoison.get!("https://localhost:8443" <> path) + HTTPoison.get!("https://localhost:8443" <> path, [], hackney: [:insecure]) {status, headers, body} end diff --git a/test/unit/healthcheck_test.exs b/test/unit/healthcheck_test.exs new file mode 100644 index 00000000..bedf63bd --- /dev/null +++ b/test/unit/healthcheck_test.exs @@ -0,0 +1,159 @@ +defmodule MongoosePushWeb.HealthcheckTest do + alias MongoosePush.Support.API + use ExUnit.Case, async: false + import Mox + + setup :verify_on_exit! + + setup do + TestHelper.reload_app() + end + + test "Successful connection to services" do + {200, _, body} = API.get("/healthcheck") + pools = Jason.decode!(body) + + fcm_pools = + :mongoose_push + |> Application.get_env(:fcm) + |> Enum.map(fn {name, info} -> + {name, Keyword.get(info, :pool_size)} + end) + + apns_pools = + :mongoose_push + |> Application.get_env(:apns) + |> Enum.map(fn {name, info} -> + {name, Keyword.get(info, :pool_size)} + end) + + for {pool_name, worker_count} <- fcm_pools ++ apns_pools do + pool_info = %{ + "pool" => Atom.to_string(pool_name), + "connection_status" => List.duplicate("connected", worker_count) + } + + assert true == Enum.member?(pools, pool_info) + end + end + + describe "Unsuccessful FCM connection" do + setup do + old_config = Application.fetch_env!(:mongoose_push, :fcm) + + new_config = + old_config + |> Enum.map(fn {pool_name, pool_info} -> + # We simulate broken FCM connection by changing the port + {pool_name, Keyword.update(pool_info, :port, 4444, &(&1 + 1))} + end) + + Application.stop(:mongoose_push) + Application.load(:mongoose_push) + Application.put_env(:mongoose_push, :fcm, new_config) + Application.start(:mongoose_push) + end + + test "is reflected in heathcheck endpoint" do + {200, _, body} = API.get("/healthcheck") + pools = Jason.decode!(body) + + fcm_pools = + :mongoose_push + |> Application.get_env(:fcm) + |> Enum.map(fn {name, info} -> + {name, Keyword.get(info, :pool_size)} + end) + + apns_pools = + :mongoose_push + |> Application.get_env(:apns) + |> Enum.map(fn {name, info} -> + {name, Keyword.get(info, :pool_size)} + end) + + for {pool_name, worker_count} <- apns_pools do + pool_info = %{ + "pool" => Atom.to_string(pool_name), + "connection_status" => List.duplicate("connected", worker_count) + } + + assert true == Enum.member?(pools, pool_info) + end + + for {pool_name, worker_count} <- fcm_pools do + pool_info = %{ + "pool" => Atom.to_string(pool_name), + "connection_status" => List.duplicate("disconnected", worker_count) + } + + assert true == Enum.member?(pools, pool_info) + end + end + end + + describe "Unsuccessful APNS and FCM connection" do + setup do + old_fcm_config = Application.fetch_env!(:mongoose_push, :fcm) + + new_fcm_config = + old_fcm_config + |> Enum.map(fn {pool_name, pool_info} -> + # We simulate broken FCM connection by changing the port + {pool_name, Keyword.update(pool_info, :port, 4444, &(&1 + 1))} + end) + + old_apns_config = Application.fetch_env!(:mongoose_push, :apns) + + new_apns_config = + old_apns_config + |> Enum.map(fn {pool_name, pool_info} -> + # We simulate broken APNS connection by changing the port + {pool_name, Keyword.update(pool_info, :use_2197, false, &(!&1))} + end) + + Application.stop(:mongoose_push) + Application.load(:mongoose_push) + Application.put_env(:mongoose_push, :fcm, new_fcm_config) + Application.put_env(:mongoose_push, :apns, new_apns_config) + Application.start(:mongoose_push) + end + + test "is reflected in heathcheck endpoint" do + {503, _, body} = API.get("/healthcheck") + pools = Jason.decode!(body) + + fcm_pools = + :mongoose_push + |> Application.get_env(:fcm) + |> Enum.map(fn {name, info} -> + {name, Keyword.get(info, :pool_size)} + end) + + apns_pools = + :mongoose_push + |> Application.get_env(:apns) + |> Enum.map(fn {name, info} -> + {name, Keyword.get(info, :pool_size)} + end) + + for {pool_name, worker_count} <- apns_pools do + pool_info = %{ + "pool" => Atom.to_string(pool_name), + "connection_status" => List.duplicate("disconnected", worker_count) + } + + assert true == Enum.member?(pools, pool_info) + end + + for {pool_name, worker_count} <- fcm_pools do + pool_info = %{ + "pool" => Atom.to_string(pool_name), + "connection_status" => List.duplicate("disconnected", worker_count) + } + + assert true == Enum.member?(pools, pool_info) + end + end + end +end