Skip to content

Commit

Permalink
Monitoring: add service URL manager (#3134)
Browse files Browse the repository at this point in the history
* Support es6 build

* Refactor: update events make the error optional and support log with color

* - Refactor
  - sendGetRequest to sendRequest and use fetch instead of axios.
  - replace serviceUrl and serviceName with getters
  - add update method that uses URL setter
  - Make all IServiceBase classes take an optional options on their contractor, add error handling for side effect
  -  Grouped all RMB properties to RMB to RMBProps
- Feat:
 - add stats monitor class that monitors stats service
 - add activation monitor class  that monitor activation service

* Feat: Add serviceURLManager
it is class managing service URLs with liveness checking <pick the first reachable endpoint for each service>

- Define retries and silent mode settings
- Implement constructor to handle StackManagerOptions
- Add private handleErrorsOnSilentMode method for error handling based on silent mode
- Implement getAvailableStack method to find reachable service URL
- Implement getAvailableServicesStack method to fetch and store available service URLs for all services

* Revert: playground/config.js

* Refactor: remove sesstion from RMBOptions it will be handled internally

* Refactor: update service monitor example

* Refactor: Rename service url manager options

* Refactor: make tfchainMonitor consotractor takes an optional param

* Example: add serviceURlManager Example

* Fix: remove error on getting Url before intialization and make it return undefined or emptystring
remove debug lines

* Chore: RMB handle error event

* Example: add template type to support silent

* Chore:
- change naming
- remove error on get url and return empty string in case the url is not set yet

* Chore:
- add errror handling in isAlive, will throw in case the service url is not set

* chore: export new services

* Chore: make tfchain url optional

* chore: change stats url to api/summary

* chore: add checks to prevent accessing undefined functions in RMB connect and disconnect

* chore: validate rmb connectivity though gridproxy health endpoint

* chore: add error handling to isAlive

* chore: support timeout, and log errors using events

* chore: support timeout

* refactor: support increasing timeout

* fix: return status object instade of throwing error

* refactor:
- rename constractor param
- use replace instade of split

* Refactor: use /health endpoint in tfchain monitor

* refactor: remove logs line

* feat: isAlive can take new url

* Feat: ping url in parallel

* Docs: update example

* Refactor: retry logic moved to all stacks, to avoid delay

* refactor: enahnce logs

* docs: update urls in example

* Chore: remove disconnect logic

* chore: Remove IDisconnectHandler interface

* Chore: update monitoring example

* chore: Update the service url on success

* Refactor: remove update function and replace it with setter

* Refactor: add ServiceBase class

* chore: predefine the timeout error
  • Loading branch information
0oM4R authored Aug 11, 2024
1 parent cddbd85 commit 457da79
Show file tree
Hide file tree
Showing 20 changed files with 427 additions and 161 deletions.
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { GraphQLMonitor, GridProxyMonitor, RMBMonitor, ServiceMonitor, TFChainMonitor } from "../src/";
import { GraphQLMonitor, GridProxyMonitor, RMBMonitor, ServiceMonitor, TFChainMonitor } from "../src";
async function HealthCheck() {
try {
const services = [
new GridProxyMonitor("<FakeURL>"),
new GraphQLMonitor("https://graphql.dev.grid.tf/graphql"),
new TFChainMonitor("wss://tfchain.dev.grid.tf/ws"),
new RMBMonitor("wss://relay.dev.grid.tf", "wss://tfchain.dev.grid.tf/ws", "mnemonic", "sr25519"),
new RMBMonitor("wss://relay.dev.grid.tf"),
];
const serviceMonitor = new ServiceMonitor(services);

Expand All @@ -16,7 +16,7 @@ async function HealthCheck() {
serviceMonitor.interval = 0.25;
const monitor = serviceMonitor.monitorService();
await new Promise(resolve => setTimeout(resolve, 0.5 * 60 * 1000));
await monitor.exitAndDisconnect();
await monitor.exit();
process.exit(0);
} catch (err) {
console.log(err);
Expand Down
38 changes: 38 additions & 0 deletions packages/monitoring/example/serviceURLManager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import {
GraphQLMonitor,
GridProxyMonitor,
RMBMonitor,
ServiceUrlManager,
TFChainMonitor,
URLManagerOptions,
} from "../src";

const gridproxy = ["https://gridproxy.dev.grid.tf/"];
const rmb = ["https://graphql.dev.grid.tf", "wss://relay.dev.grid.tf", "wss://relay.02.dev.grid.tf"];
const graphql = ["https://graphql.dev.grid.tf/graphql"];
const tfChain = ["wss://tfchain.dev.grid.tf/ws"];

async function checkStacksAvailability<N extends boolean>(services: URLManagerOptions<N>) {
try {
const pickedUrls = await new ServiceUrlManager(services).getAvailableServicesStack();
console.log(pickedUrls);
process.exit(0);
} catch (err) {
console.log(err);
}
}

checkStacksAvailability({
retries: 3,
silent: true,
services: [
{ service: new GridProxyMonitor(), URLs: gridproxy },
{ service: new GraphQLMonitor(), URLs: graphql },
{
service: new RMBMonitor(),
URLs: rmb,
},
{ service: new TFChainMonitor(), URLs: tfChain },
],
timeout: 2,
});
21 changes: 15 additions & 6 deletions packages/monitoring/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,21 @@
"version": "2.5.0",
"description": "Threefold monitoring package",
"license": "Apache-2.0",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"example": "yarn run ts-node --project tsconfig.json example/index.ts",
"build": "tsc",
"main": "./dist/node/index.js",
"module": "./dist/es6/index.js",
"exports": {
"require": "./dist/node/index.js",
"import": "./dist/es6/index.js"
},
"types": "dist/es6/index.d.ts",
"files": [
"/dist"
],
"scripts": {
"example": "yarn run ts-node --project tsconfig-node.json",
"build": "npm-run-all es6-build node-build",
"node-build": "tsc --build tsconfig-node.json",
"es6-build": "tsc --build tsconfig-es6.json",
"test": "jest "
},
"author": "Omar Kassem",
Expand All @@ -19,7 +29,6 @@
"@threefold/rmb_direct_client": "2.5.0",
"@threefold/tfchain_client": "2.5.0",
"@threefold/types": "2.5.0",
"axios": "^0.27.2",
"chalk": "4.1.2",
"ts-node": "^10.9.1",
"typescript": "^5.3.3"
Expand Down
14 changes: 9 additions & 5 deletions packages/monitoring/src/helpers/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,25 @@ class MonitorEventEmitter extends EventEmitter {
this.addListener(MonitorEvents.storeStatus, this.addToServiceSummary);
this.addListener(MonitorEvents.summarize, this.printStatusSummary);
}
public log(message: string) {
this.emit("MonitorLog", message);
public log(message: string, color?: string) {
this.emit("MonitorLog", message, color);
}
public summarize() {
this.emit("MonitorSummarize");
}
public storeStatus(serviceName: string, isAlive: boolean) {
this.emit("MonitorStoreStatus", serviceName, isAlive);
}
public serviceDown(serviceName: string, error: Error) {
public serviceDown(serviceName: string, error?: Error) {
this.emit("MonitorServiceDown", serviceName, error);
}

private monitorLogsHandler(msg) {
console.log(msg);
private monitorLogsHandler(msg: unknown, color?: string) {
if (color && chalk[color]) {
console.log(chalk[color](msg));
} else {
console.log(msg);
}
}
private serviceDownHandler(serviceName: string, error: Error) {
console.log(`${chalk.red.bold(serviceName + " is Down")}`);
Expand Down
14 changes: 6 additions & 8 deletions packages/monitoring/src/helpers/utils.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import { RequestError } from "@threefold/types";
import axios, { AxiosError, AxiosRequestConfig } from "axios";
import { ServiceStatus } from "src/types";

export async function sendGetRequest(url: string, options: AxiosRequestConfig = {}) {
import { ServiceStatus } from "../types";

export async function sendRequest(url: string, options: RequestInit) {
try {
return await axios.get(url, options);
const res = await fetch(url, options);
if (!res?.ok) throw Error(`HTTP Response Code: ${res?.status}`);
} catch (e) {
const { response } = e as AxiosError;
const errorMessage = (response?.data as { error: string })?.error || (e as Error).message;

throw new RequestError(`HTTP request failed ${errorMessage ? "due to " + errorMessage : ""}.`);
throw new RequestError(`HTTP request failed due to ${e}.`);
}
}

Expand Down
1 change: 1 addition & 0 deletions packages/monitoring/src/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export * from "./serviceMonitor/index";
export * from "./types";
14 changes: 14 additions & 0 deletions packages/monitoring/src/serviceMonitor/activations.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { resolveServiceStatus, sendRequest } from "../helpers/utils";
import { ILivenessChecker, ServiceStatus } from "../types";
import { ServiceBase } from "./serviceBase";

export class ActivationMonitor extends ServiceBase implements ILivenessChecker {
constructor(ServiceUrl?: string) {
super("Activation");
if (ServiceUrl) this.url = ServiceUrl;
}
async isAlive(url = this.url): Promise<ServiceStatus> {
if (!url) throw new Error("Can't access before initialization");
return resolveServiceStatus(sendRequest(url, { method: "Get" }));
}
}
38 changes: 12 additions & 26 deletions packages/monitoring/src/serviceMonitor/alivenessChecker.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { monitorEvents } from "../helpers/events";
import { IDisconnectHandler, ILivenessChecker } from "../types";
import { ILivenessChecker } from "../types";

/**
* Represents a service monitor that periodically checks the liveness of multiple services.
Expand All @@ -23,57 +23,43 @@ export class ServiceMonitor {
for (let retryCount = 1; retryCount <= this.retries; retryCount++) {
const { alive, error } = await service.isAlive();
if (alive) {
monitorEvents.storeStatus(service.serviceName(), alive);
monitorEvents.storeStatus(service.name, alive);
break;
}
if (retryCount < this.retries) {
monitorEvents.log(`${service.serviceName()} seems to be down; Retrying (${retryCount}/${this.retries})...`);
monitorEvents.log(`${service.name} seems to be down; Retrying (${retryCount}/${this.retries})...`);
await new Promise(resolve => setTimeout(resolve, this.retryInterval * 60));
} else monitorEvents.serviceDown(service.serviceName(), error);
} else monitorEvents.serviceDown(service.name, error);
}
}
monitorEvents.summarize();
}

/**
* Disconnects services that implement the `IDisconnectHandler` interface.
* @returns A promise that resolves when all services are disconnected.
* Monitors the services at a regular interval and returns a function to exit the monitoring.
* @returns An object with a function `exit` to stop the monitoring services.
*/
public async disconnect(): Promise<void> {
for (const service of this.services) {
if ("disconnect" in service) {
await (service as IDisconnectHandler).disconnect();
}
}
}

/**
* Monitors the services at a regular interval and returns a function to exit and disconnect the monitoring.
* @returns An object with a function `exitAndDisconnect` to stop the monitoring and disconnect services.
*/
public monitorService(): { exitAndDisconnect: () => Promise<void> } {
public monitorService(): { exit: () => Promise<void> } {
if (this.services.length === 0) throw new Error("No services to monitor");

monitorEvents.log(`Checking services status...`);
this.checkLivenessOnce();
const intervalId = setInterval(async () => await this.checkLivenessOnce(), this.interval * 60 * 1000);

/**
* Stops the monitoring and disconnects the services.
* @returns A promise that resolves when the monitoring is stopped and services are disconnected.
* Stops the monitoring services.
* @returns A promise that resolves when the monitoring is stopped.
*/
const exitAndDisconnect = async (): Promise<void> => {
const exit = async (): Promise<void> => {
clearInterval(intervalId);
await this.disconnect();
};
return { exitAndDisconnect };
return { exit };
}

/**
* Checks the liveness of each service once and disconnects the services.
* Checks the liveness of each service once.
*/
public async pingService(): Promise<void> {
await this.checkLivenessOnce();
await this.disconnect();
}
}
32 changes: 17 additions & 15 deletions packages/monitoring/src/serviceMonitor/graphql.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import { resolveServiceStatus, sendGetRequest } from "../helpers/utils";
import { resolveServiceStatus, sendRequest } from "../helpers/utils";
import { ILivenessChecker, ServiceStatus } from "../types";
import { ServiceBase } from "./serviceBase";

export class GraphQLMonitor implements ILivenessChecker {
private readonly name = "GraphQl";
private readonly url: string;
constructor(graphQlUrl: string) {
this.url = graphQlUrl;
export class GraphQLMonitor extends ServiceBase implements ILivenessChecker {
constructor(ServiceUrl?: string) {
super("GraphQl");
if (ServiceUrl) this.url = ServiceUrl;
}
serviceName() {
return this.name;
}
serviceUrl() {
return this.url;
}

async isAlive(): Promise<ServiceStatus> {
return resolveServiceStatus(sendGetRequest(this.url));
async isAlive(url = this.url): Promise<ServiceStatus> {
if (!url) throw new Error("Can't access before initialization");
return resolveServiceStatus(
sendRequest(url, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
query: "query monitoring{__typename}",
}),
}),
);
}
}
25 changes: 10 additions & 15 deletions packages/monitoring/src/serviceMonitor/gridproxy.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import { resolveServiceStatus, sendGetRequest } from "../helpers/utils";
import { resolveServiceStatus, sendRequest } from "../helpers/utils";
import { ILivenessChecker, ServiceStatus } from "../types";
import { ServiceBase } from "./serviceBase";

export class GridProxyMonitor implements ILivenessChecker {
private readonly name = "GridProxy";
private url: string;
constructor(gridProxyUrl: string) {
this.url = gridProxyUrl;
}
serviceName() {
return this.name;
}
serviceUrl() {
return this.url;
}
async isAlive(): Promise<ServiceStatus> {
return resolveServiceStatus(sendGetRequest(this.url));
export class GridProxyMonitor extends ServiceBase implements ILivenessChecker {
constructor(ServiceUrl?: string) {
super("GridProxy");
if (ServiceUrl) this.url = ServiceUrl;
}
async isAlive(url = this.url): Promise<ServiceStatus> {
if (!url) throw new Error("Can't access before initialization");
return resolveServiceStatus(sendRequest(url, { method: "Get" }));
}
}
3 changes: 3 additions & 0 deletions packages/monitoring/src/serviceMonitor/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ export { TFChainMonitor } from "./tfChain";
export { RMBMonitor } from "./rmb";
export { ServiceMonitor } from "./alivenessChecker";
export { GraphQLMonitor } from "./graphql";
export { StatsMonitor } from "./stats";
export { ActivationMonitor } from "./activations";
export { ServiceUrlManager } from "./serviceURLManager";
54 changes: 27 additions & 27 deletions packages/monitoring/src/serviceMonitor/rmb.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
import { KeypairType } from "@polkadot/util-crypto/types";
import { Client as RMBClient } from "@threefold/rmb_direct_client";
import { RequestError } from "@threefold/types";

import { generateString, resolveServiceStatus } from "../helpers/utils";
import { IDisconnectHandler, ILivenessChecker, ServiceStatus } from "../types";

export class RMBMonitor implements ILivenessChecker, IDisconnectHandler {
private name = "RMB";
private url: string;
private rmbClient: RMBClient;
constructor(relayUrl: string, chainUrl: string, mnemonic: string, keypairType: KeypairType) {
this.url = relayUrl;
this.rmbClient = new RMBClient(chainUrl, relayUrl, mnemonic, generateString(10), keypairType, 0);
}
private async setUp() {
await this.rmbClient.connect();
}
public serviceName() {
return this.name;
}
public serviceUrl() {
return this.url;
}
public async isAlive(): Promise<ServiceStatus> {
if (!this.rmbClient?.con?.OPEN) await this.setUp();
return resolveServiceStatus(this.rmbClient.ping(2));
import { ILivenessChecker, ServiceStatus } from "../types";
import { ServiceBase } from "./serviceBase";
export class RMBMonitor extends ServiceBase implements ILivenessChecker {
constructor(ServiceUrl?: string) {
super("RMB");
if (ServiceUrl) this.url = ServiceUrl;
}
public async disconnect() {
await this.rmbClient.disconnect();
async isAlive(url = this.url): Promise<ServiceStatus> {
if (!url) throw new Error("Can't access before initialization");
const proxyUrl = url.replace("wss://relay", "https://gridproxy");
try {
const res = await fetch(proxyUrl + "/health");
if (!res?.ok) throw Error(`HTTP Response Code: ${res?.status}`);
const rmb_conn = (await res.json())?.rmb_conn;
if (rmb_conn === "ok") {
return {
alive: true,
};
} else {
return {
alive: false,
error: new Error(`rmb_conn is ${rmb_conn}`),
};
}
} catch (e) {
throw new RequestError(`HTTP request failed due to ${e}.`);
}
}
}
15 changes: 15 additions & 0 deletions packages/monitoring/src/serviceMonitor/serviceBase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { IServiceBase } from "../types";

export class ServiceBase implements IServiceBase {
private _url?: string;
constructor(private readonly _name: string) {}
public get name() {
return this._name;
}
public get url() {
return this._url ?? "";
}
public set url(url: string) {
this._url = url;
}
}
Loading

0 comments on commit 457da79

Please sign in to comment.