diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..63e8c71 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,36 @@ +name: Build and Publish +on: + release: + types: [published] +jobs: + docker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Print tag number + run: echo Publishing to tag ${{ github.event.release.tag_name }} + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build and push (release tag) + uses: docker/build-push-action@v2 + with: + push: true + tags: awaragi/prometheus-mssql-exporter:${{ github.event.release.tag_name }} + - name: Build and push (latest) + uses: docker/build-push-action@v2 + with: + push: true + tags: awaragi/poc-hub-docker:latest + - name: Update repo description + uses: peter-evans/dockerhub-description@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + repository: awaragi/prometheus-mssql-exporter diff --git a/.gitignore b/.gitignore index e599622..830ff0a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +node_modules *.iml -/node_modules/ -/.idea/ +.idea +.vscode +.env +package-lock.json +yarn.lock +yarn-error.log diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..33ed020 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,4 @@ +node_modules/* +package-lock.json +yarn.lock + diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..91a3517 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,3 @@ +{ + "printWidth": 160 +} diff --git a/Dockerfile b/Dockerfile index 9ba929c..9857549 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ -FROM node:8.4.0-alpine -MAINTAINER Pierre Awaragi (pierre@awaragi.com) +FROM node:16.14.2-alpine +LABEL MAINTAINER="Pierre Awaragi (pierre@awaragi.com), cobolbaby" +LABEL org.opencontainers.image.authors="Pierre Awaragi (pierre@awaragi.com), cobolbaby" # Create a directory where our app will be placed RUN mkdir -p /usr/src/app @@ -8,7 +9,7 @@ RUN mkdir -p /usr/src/app WORKDIR /usr/src/app # Copy dependency definitions -COPY package.json *.js /usr/src/app/ +COPY package.json *.js ./ # Install dependecies RUN npm install --production diff --git a/README.md b/README.md index fdf7531..a47894e 100644 --- a/README.md +++ b/README.md @@ -1,72 +1,124 @@ -Prometheus MSSQL Exporter Docker Container -============= +# Prometheus MSSQL Exporter Docker Container Prometheus exporter for Microsoft SQL Server (MSSQL). Exposes the following metrics -* mssql_instance_local_time Number of seconds since epoch on local instance -* mssql_connections{database,state} Number of active connections -* mssql_deadlocks Number of lock requests per second that resulted in a deadlock since last restart -* mssql_user_errors Number of user errors/sec since last restart -* mssql_kill_connection_errors Number of kill connection errors/sec since last restart -* mssql_database_state{database} State of each database (0=online 1=restoring 2=recovering 3=recovery pending 4=suspect 5=emergency 6=offline 7=copying 10=offline secondary) -* mssql_log_growths{database} Total number of times the transaction log for the database has been expanded last restart -* mssql_database_filesize{database,logicalname,type,filename} Physical sizes of files used by database in KB, their names and types (0=rows, 1=log, 2=filestream,3=n/a 4=fulltext(prior to version 2008 of MS SQL Server)) -* mssql_page_life_expectancy Indicates the minimum number of seconds a page will stay in the buffer pool on this node without references. The traditional advice from Microsoft used to be that the PLE should remain above 300 seconds -* mssql_io_stall{database,type} Wait time (ms) of stall since last restart -* mssql_io_stall_total{database} Wait time (ms) of stall since last restart -* mssql_batch_requests Number of Transact-SQL command batches received per second. This statistic is affected by all constraints (such as I/O, number of users, cachesize, complexity of requests, and so on). High batch requests mean good throughput -* mssql_page_fault_count Number of page faults since last restart -* mssql_memory_utilization_percentage Percentage of memory utilization -* mssql_total_physical_memory_kb Total physical memory in KB -* mssql_available_physical_memory_kb Available physical memory in KB -* mssql_total_page_file_kb Total page file in KB -* mssql_available_page_file_kb Available page file in KB +- mssql_instance_local_time Number of seconds since epoch on local instance +- mssql_connections{database,state} Number of active connections +- mssql_deadlocks Number of lock requests per second that resulted in a deadlock since last restart +- mssql_user_errors Number of user errors/sec since last restart +- mssql_kill_connection_errors Number of kill connection errors/sec since last restart +- mssql_database_state{database} State of each database (0=online 1=restoring 2=recovering 3=recovery pending 4=suspect 5=emergency 6=offline 7=copying 10=offline secondary) +- mssql_log_growths{database} Total number of times the transaction log for the database has been expanded last restart +- mssql_database_filesize{database,logicalname,type,filename} Physical sizes of files used by database in KB, their names and types (0=rows, 1=log, 2=filestream,3=n/a 4=fulltext(prior to version 2008 of MS SQL Server)) +- mssql_page_life_expectancy Indicates the minimum number of seconds a page will stay in the buffer pool on this node without references. The traditional advice from Microsoft used to be that the PLE should remain above 300 seconds +- mssql_io_stall{database,type} Wait time (ms) of stall since last restart +- mssql_io_stall_total{database} Wait time (ms) of stall since last restart +- mssql_batch_requests Number of Transact-SQL command batches received per second. This statistic is affected by all constraints (such as I/O, number of users, cachesize, complexity of requests, and so on). High batch requests mean good throughput +- mssql_page_fault_count Number of page faults since last restart +- mssql_memory_utilization_percentage Percentage of memory utilization +- mssql_total_physical_memory_kb Total physical memory in KB +- mssql_available_physical_memory_kb Available physical memory in KB +- mssql_total_page_file_kb Total page file in KB +- mssql_available_page_file_kb Available page file in KB Please feel free to submit other interesting metrics to include. -Usage ------ +> This exporter has been tested against MSSQL 2017 and 2019 docker images (only ones offered by Microsoft). Other versions might be work but have not been tested. + +## Usage `docker run -e SERVER=192.168.56.101 -e USERNAME=SA -e PASSWORD=qkD4x3yy -e DEBUG=app -p 4000:4000 --name prometheus-mssql-exporter awaragi/prometheus-mssql-exporter` The image supports the following environments and exposes port 4000 -* **SERVER** server ip or dns name (required) -* **PORT** server port (optional defaults to 1433) -* **USERNAME** access user (required) -* **PASSWORD** access password (required) -* **DEBUG** comma delimited list of enabled logs (optional currently supports app and metrics) +- **SERVER** server ip or dns name (required) +- **PORT** server port (optional defaults to 1433) +- **USERNAME** access user (required) +- **PASSWORD** access password (required) +- **DEBUG** comma delimited list of enabled logs (optional currently supports app and metrics) It is **_required_** that the specified user has the following permissions -* GRANT VIEW ANY DEFINITION TO -* GRANT VIEW SERVER STATE TO +- GRANT VIEW ANY DEFINITION TO +- GRANT VIEW SERVER STATE TO + +## Development + +## Launch a test mssql server + +To launch a local mssql instance to test against + +```shell +npm run test:mssql:2019 +# or +npm run test:mssql:2017 +``` + +To use a persistent storage add `-v /mypath:/var/opt/mssql/data` to your version of package.json + +## List all available metrics + +```shell +node metrics.js +``` + +## Environment variables + +- SERVER: sqlserver +- PORT: sql server port (defaults to 1433) +- USERNAME: sql server user (should have admin or user with required permissions) +- PASSWORD: sql user password +- EXPOSE: webserver port (defaults to 4000) +- DEBUG: verbose logging + - app for application logging + - metrics for metrics executions logging + - db for database connection logging + - queries for database queries and results logging + +## Launch via command line + +### Using NodeJS + +To execute and the application using locally running mssql (see above for how to launch a docker instance of mssql), +use the following command which will generate all a detailed logs + +```shell +npm start +``` + +A more verbose execution with all queries and their results printed out + +```shell +npm run start:verbose +``` + +### Using Docker + +To build and launch your docker image use the following command -Development ------------ +```shell +npm run docker:run +``` -### Launch via command line +## Testing -` -SERVER=sqlserver -PORT=sqlport<1433> -USERNAME=sqluser -PASSWORD=sqluserpassword -EXPOSE=webport<4000> -node ./index.js -` +Use curl or wget to fetch the metrics from launched web application. -To enable debugging set the environment variable DEBUG to app and/or metrics (DEBUG=app) +```shell +curl http://localhost:4000/metrics +``` -for example: -`DEBUG=app,metrics SERVER=192.168.56.101 USERNAME=SA PASSWORD=qkD4x3yy node ./index.js` +E2E test is available to execute against MSSQL 2017 or 2019 docker instances. +Any added metrics must get added to the e2e tests. -### building and pushing image to dockerhub +## Metric listing -`npm run push` +Call metrics.js file directly to generate documentation of available metrics and to update this README file. -### Launch a mock mssql server +```shell +node metrics.js +``` -`docker run -e ACCEPT_EULA=Y -e SA_PASSWORD=qkD4x3yy -p 1433:1433 --name mssql -d microsoft/mssql-server-linux` +## building and pushing image to dockerhub -To use a persistent storage include `-v /mypath:/var/opt/mssql/data` +Use docker push or the bundled Github Workflows/Actions (see .github/workflows) diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..bdb8e47 --- /dev/null +++ b/TODO.md @@ -0,0 +1,5 @@ +# TODO metrics + +- sql version (see https://www.mssqltips.com/sqlservertip/1140/how-to-tell-what-sql-server-version-you-are-running/ and summary metric) +- https://github.com/rongfengliang/prometheus-mssql-exporter/commit/0fc70b747cf2bd8f2e96c58a2cb6f1fc4aae39dd +- custom metrics? https://github.com/awaragi/prometheus-mssql-exporter/pull/17 diff --git a/index.js b/index.js index fe2b56a..feae7c8 100644 --- a/index.js +++ b/index.js @@ -1,34 +1,37 @@ -const debug = require("debug")("app"); -const Connection = require('tedious').Connection; -const Request = require('tedious').Request; -const app = require('express')(); +const appLog = require("debug")("app"); +const dbLog = require("debug")("db"); +const queriesLog = require("debug")("queries"); -const client = require('./metrics').client; -const up = require('./metrics').up; -const metrics = require('./metrics').metrics; +const Connection = require("tedious").Connection; +const Request = require("tedious").Request; +const app = require("express")(); + +const client = require("./metrics").client; +const mssql_up = require("./metrics").mssql_up; +const metrics = require("./metrics").metrics; let config = { - connect: { - server: process.env["SERVER"], - userName: process.env["USERNAME"], - password: process.env["PASSWORD"], - options: { - port: process.env["PORT"] || 1433, - encrypt: true, - rowCollectionOnRequestCompletion: true - } + connect: { + server: process.env["SERVER"], + userName: process.env["USERNAME"], + password: process.env["PASSWORD"], + options: { + port: parseInt(process.env["PORT"]) || 1433, + encrypt: true, + rowCollectionOnRequestCompletion: true, }, - port: process.env["EXPOSE"] || 4000 + }, + port: process.env["EXPOSE"] || 4000, }; if (!config.connect.server) { - throw new Error("Missing SERVER information") + throw new Error("Missing SERVER information"); } if (!config.connect.userName) { - throw new Error("Missing USERNAME information") + throw new Error("Missing USERNAME information"); } if (!config.connect.password) { - throw new Error("Missing PASSWORD information") + throw new Error("Missing PASSWORD information"); } /** @@ -37,23 +40,22 @@ if (!config.connect.password) { * @returns Promise */ async function connect() { - return new Promise((resolve, reject) => { - debug("Connecting to database", config.connect.server); - let connection = new Connection(config.connect); - connection.on('connect', (error) => { - if (error) { - console.error("Failed to connect to database:", error.message || error); - reject(error); - } else { - debug("Connected to database"); - resolve(connection); - } - }); - connection.on('end', () => { - debug("Connection to database ended"); - }); + return new Promise((resolve, reject) => { + dbLog("Connecting to database", config.connect.server); + let connection = new Connection(config.connect); + connection.on("connect", (error) => { + if (error) { + console.error("Failed to connect to database:", error.message || error); + reject(error); + } else { + dbLog("Connected to database"); + resolve(connection); + } }); - + connection.on("end", () => { + dbLog("Connection to database ended"); + }); + }); } /** @@ -65,18 +67,20 @@ async function connect() { * @returns Promise of collect operation (no value returned) */ async function measure(connection, collector) { - return new Promise((resolve) => { - let request = new Request(collector.query, (error, rowCount, rows) => { - if (!error) { - collector.collect(rows, collector.metrics); - resolve(); - } else { - console.error("Error executing SQL query", collector.query, error); - resolve(); - } - }); - connection.execSql(request); + return new Promise((resolve) => { + queriesLog(`Executing query: ${collector.query}`); + let request = new Request(collector.query, (error, rowCount, rows) => { + if (!error) { + queriesLog(`Retrieved rows ${JSON.stringify(rows, null, 2)}`); + collector.collect(rows, collector.metrics); + resolve(); + } else { + console.error("Error executing SQL query", collector.query, error); + resolve(); + } }); + connection.execSql(request); + }); } /** @@ -87,33 +91,38 @@ async function measure(connection, collector) { * @returns Promise of execution (no value returned) */ async function collect(connection) { - up.set(1); - for (let i = 0; i < metrics.length; i++) { - await measure(connection, metrics[i]); - } + mssql_up.set(1); + for (let i = 0; i < metrics.length; i++) { + await measure(connection, metrics[i]); + } } -app.get('/metrics', async (req, res) => { - res.contentType(client.register.contentType); +app.get("/metrics", async (req, res) => { + res.contentType(client.register.contentType); - try { - let connection = await connect(); - await collect(connection, metrics); - connection.close(); - res.send(client.register.metrics()); - } catch (error) { - // error connecting - up.set(0); - res.header("X-Error", error.message || error); - res.send(client.register.getSingleMetricAsString(up.name)); - } + try { + appLog("Received metrics request"); + let connection = await connect(); + await collect(connection, metrics); + connection.close(); + res.send(client.register.metrics()); + appLog("Successfully processed metrics request"); + } catch (error) { + // error connecting + appLog("Error handling metrics request"); + mssql_up.set(0); + res.header("X-Error", error.message || error); + res.send(client.register.getSingleMetricAsString(mssql_up.name)); + } }); const server = app.listen(config.port, function () { - debug(`Prometheus-MSSQL Exporter listening on local port ${config.port} monitoring ${config.connect.userName}@${config.connect.server}:${config.connect.options.port}`); + appLog( + `Prometheus-MSSQL Exporter listening on local port ${config.port} monitoring ${config.connect.userName}@${config.connect.server}:${config.connect.options.port}` + ); }); -process.on('SIGINT', function () { - server.close(); - process.exit(0); +process.on("SIGINT", function () { + server.close(); + process.exit(0); }); diff --git a/metrics.js b/metrics.js index bd73c28..b1041e5 100644 --- a/metrics.js +++ b/metrics.js @@ -2,159 +2,177 @@ * Collection of metrics and their associated SQL requests * Created by Pierre Awaragi */ -const debug = require("debug")("metrics"); -const client = require('prom-client'); +const metricsLog = require("debug")("metrics"); +const client = require("prom-client"); // UP metric -const up = new client.Gauge({name: 'up', help: "UP Status"}); +const mssql_up = new client.Gauge({ name: "mssql_up", help: "UP Status" }); // Query based metrics // ------------------- const mssql_instance_local_time = { - metrics: { - mssql_instance_local_time: new client.Gauge({name: 'mssql_instance_local_time', help: 'Number of seconds since epoch on local instance'}) - }, - query: `SELECT DATEDIFF(second, '19700101', GETUTCDATE())`, - collect: function (rows, metrics) { - const mssql_instance_local_time = rows[0][0].value; - debug("Fetch current time", mssql_instance_local_time); - metrics.mssql_instance_local_time.set(mssql_instance_local_time); - } + metrics: { + mssql_instance_local_time: new client.Gauge({ name: "mssql_instance_local_time", help: "Number of seconds since epoch on local instance" }), + }, + query: `SELECT DATEDIFF(second, '19700101', GETUTCDATE())`, + collect: function (rows, metrics) { + const mssql_instance_local_time = rows[0][0].value; + metricsLog("Fetched current time", mssql_instance_local_time); + metrics.mssql_instance_local_time.set(mssql_instance_local_time); + }, }; const mssql_connections = { - metrics: { - mssql_connections: new client.Gauge({name: 'mssql_connections', help: 'Number of active connections', labelNames: ['database', 'state',]}) - }, - query: `SELECT DB_NAME(sP.dbid) + metrics: { + mssql_connections: new client.Gauge({ name: "mssql_connections", help: "Number of active connections", labelNames: ["database", "state"] }), + }, + query: `SELECT DB_NAME(sP.dbid) , COUNT(sP.spid) FROM sys.sysprocesses sP GROUP BY DB_NAME(sP.dbid)`, - collect: function (rows, metrics) { - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - const database = row[0].value; - const mssql_connections = row[1].value; - debug("Fetch number of connections for database", database, mssql_connections); - metrics.mssql_connections.set({database: database, state: 'current'}, mssql_connections); - } + collect: function (rows, metrics) { + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const database = row[0].value; + const mssql_connections = row[1].value; + metricsLog("Fetched number of connections for database", database, mssql_connections); + metrics.mssql_connections.set({ database: database, state: "current" }, mssql_connections); } + }, }; const mssql_deadlocks = { - metrics: { - mssql_deadlocks_per_second: new client.Gauge({name: 'mssql_deadlocks', help: 'Number of lock requests per second that resulted in a deadlock since last restart'}) - }, - query: `SELECT cntr_value + metrics: { + mssql_deadlocks_per_second: new client.Gauge({ + name: "mssql_deadlocks", + help: "Number of lock requests per second that resulted in a deadlock since last restart", + }), + }, + query: `SELECT cntr_value FROM sys.dm_os_performance_counters where counter_name = 'Number of Deadlocks/sec' AND instance_name = '_Total'`, - collect: function (rows, metrics) { - const mssql_deadlocks = rows[0][0].value; - debug("Fetch number of deadlocks/sec", mssql_deadlocks); - metrics.mssql_deadlocks_per_second.set(mssql_deadlocks) - } + collect: function (rows, metrics) { + const mssql_deadlocks = rows[0][0].value; + metricsLog("Fetched number of deadlocks/sec", mssql_deadlocks); + metrics.mssql_deadlocks_per_second.set(mssql_deadlocks); + }, }; const mssql_user_errors = { - metrics: { - mssql_user_errors: new client.Gauge({name: 'mssql_user_errors', help: 'Number of user errors/sec since last restart'}) - }, - query: `SELECT cntr_value + metrics: { + mssql_user_errors: new client.Gauge({ name: "mssql_user_errors", help: "Number of user errors/sec since last restart" }), + }, + query: `SELECT cntr_value FROM sys.dm_os_performance_counters where counter_name = 'Errors/sec' AND instance_name = 'User Errors'`, - collect: function (rows, metrics) { - const mssql_user_errors = rows[0][0].value; - debug("Fetch number of user errors/sec", mssql_user_errors); - metrics.mssql_user_errors.set(mssql_user_errors) - } + collect: function (rows, metrics) { + const mssql_user_errors = rows[0][0].value; + metricsLog("Fetched number of user errors/sec", mssql_user_errors); + metrics.mssql_user_errors.set(mssql_user_errors); + }, }; const mssql_kill_connection_errors = { - metrics: { - mssql_kill_connection_errors: new client.Gauge({name: 'mssql_kill_connection_errors', help: 'Number of kill connection errors/sec since last restart'}) - }, - query: `SELECT cntr_value + metrics: { + mssql_kill_connection_errors: new client.Gauge({ name: "mssql_kill_connection_errors", help: "Number of kill connection errors/sec since last restart" }), + }, + query: `SELECT cntr_value FROM sys.dm_os_performance_counters where counter_name = 'Errors/sec' AND instance_name = 'Kill Connection Errors'`, - collect: function (rows, metrics) { - const mssql_kill_connection_errors = rows[0][0].value; - debug("Fetch number of kill connection errors/sec", mssql_kill_connection_errors); - metrics.mssql_kill_connection_errors.set(mssql_kill_connection_errors) - } + collect: function (rows, metrics) { + const mssql_kill_connection_errors = rows[0][0].value; + metricsLog("Fetched number of kill connection errors/sec", mssql_kill_connection_errors); + metrics.mssql_kill_connection_errors.set(mssql_kill_connection_errors); + }, }; const mssql_database_state = { - metrics: { - mssql_database_state: new client.Gauge({name: 'mssql_database_state', help: 'Databases states: 0=ONLINE 1=RESTORING 2=RECOVERING 3=RECOVERY_PENDING 4=SUSPECT 5=EMERGENCY 6=OFFLINE 7=COPYING 10=OFFLINE_SECONDARY', labelNames: ['database']}), - }, - query: `SELECT name,state FROM master.sys.databases`, - collect: function (rows, metrics) { - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - const database = row[0].value; - const mssql_database_state = row[1].value; - debug("Fetch state for database", database); - metrics.mssql_database_state.set({database: database}, mssql_database_state); - } + metrics: { + mssql_database_state: new client.Gauge({ + name: "mssql_database_state", + help: "Databases states: 0=ONLINE 1=RESTORING 2=RECOVERING 3=RECOVERY_PENDING 4=SUSPECT 5=EMERGENCY 6=OFFLINE 7=COPYING 10=OFFLINE_SECONDARY", + labelNames: ["database"], + }), + }, + query: `SELECT name,state FROM master.sys.databases`, + collect: function (rows, metrics) { + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const database = row[0].value; + const mssql_database_state = row[1].value; + metricsLog("Fetched state for database", database); + metrics.mssql_database_state.set({ database: database }, mssql_database_state); } + }, }; const mssql_log_growths = { - metrics: { - mssql_log_growths: new client.Gauge({name: 'mssql_log_growths', help: 'Total number of times the transaction log for the database has been expanded last restart', labelNames: ['database']}), - }, - query: `SELECT rtrim(instance_name),cntr_value + metrics: { + mssql_log_growths: new client.Gauge({ + name: "mssql_log_growths", + help: "Total number of times the transaction log for the database has been expanded last restart", + labelNames: ["database"], + }), + }, + query: `SELECT rtrim(instance_name),cntr_value FROM sys.dm_os_performance_counters where counter_name = 'Log Growths' and instance_name <> '_Total'`, - collect: function (rows, metrics) { - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - const database = row[0].value; - const mssql_log_growths = row[1].value; - debug("Fetch number log growths for database", database); - metrics.mssql_log_growths.set({database: database}, mssql_log_growths); - } + collect: function (rows, metrics) { + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const database = row[0].value; + const mssql_log_growths = row[1].value; + metricsLog("Fetched number log growths for database", database); + metrics.mssql_log_growths.set({ database: database }, mssql_log_growths); } + }, }; const mssql_database_filesize = { - metrics: { - mssql_database_filesize: new client.Gauge({name: 'mssql_database_filesize', help: 'Physical sizes of files used by database in KB, their names and types (0=rows, 1=log, 2=filestream,3=n/a 4=fulltext(before v2008 of MSSQL))', labelNames: ['database','logicalname','type','filename']}), - }, - query: `SELECT DB_NAME(database_id) AS database_name, Name AS logical_name, type, physical_name, (size * 8) size_kb FROM sys.master_files`, - collect: function (rows, metrics) { - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - const database = row[0].value; - const logicalname = row[1].value - const type = row[2].value - const filename = row[3].value - const mssql_database_filesize = row[4].value; - debug("Fetch size of files for database ", database); - metrics.mssql_database_filesize.set({database: database, logicalname: logicalname, type: type, filename: filename}, mssql_database_filesize); - } + metrics: { + mssql_database_filesize: new client.Gauge({ + name: "mssql_database_filesize", + help: "Physical sizes of files used by database in KB, their names and types (0=rows, 1=log, 2=filestream,3=n/a 4=fulltext(before v2008 of MSSQL))", + labelNames: ["database", "logicalname", "type", "filename"], + }), + }, + query: `SELECT DB_NAME(database_id) AS database_name, Name AS logical_name, type, physical_name, (size * cast(8 as bigint)) size_kb FROM sys.master_files`, + collect: function (rows, metrics) { + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const database = row[0].value; + const logicalname = row[1].value; + const type = row[2].value; + const filename = row[3].value; + const mssql_database_filesize = row[4].value; + metricsLog("Fetched size of files for database ", database); + metrics.mssql_database_filesize.set({ database: database, logicalname: logicalname, type: type, filename: filename }, mssql_database_filesize); } + }, }; const mssql_page_life_expectancy = { - metrics: { - mssql_page_life_expectancy: new client.Gauge({name: 'mssql_page_life_expectancy', help: 'Indicates the minimum number of seconds a page will stay in the buffer pool on this node without references. The traditional advice from Microsoft used to be that the PLE should remain above 300 seconds'}) - }, - query: `SELECT TOP 1 cntr_value + metrics: { + mssql_page_life_expectancy: new client.Gauge({ + name: "mssql_page_life_expectancy", + help: "Indicates the minimum number of seconds a page will stay in the buffer pool on this node without references. The traditional advice from Microsoft used to be that the PLE should remain above 300 seconds", + }), + }, + query: `SELECT TOP 1 cntr_value FROM sys.dm_os_performance_counters with (nolock)where counter_name='Page life expectancy'`, - collect: function (rows, metrics) { - const mssql_page_life_expectancy = rows[0][0].value; - debug("Fetch page life expectancy", mssql_page_life_expectancy); - metrics.mssql_page_life_expectancy.set(mssql_page_life_expectancy) - } + collect: function (rows, metrics) { + const mssql_page_life_expectancy = rows[0][0].value; + metricsLog("Fetched page life expectancy", mssql_page_life_expectancy); + metrics.mssql_page_life_expectancy.set(mssql_page_life_expectancy); + }, }; const mssql_io_stall = { - metrics: { - mssql_io_stall: new client.Gauge({name: 'mssql_io_stall', help: 'Wait time (ms) of stall since last restart', labelNames: ['database', 'type']}), - mssql_io_stall_total: new client.Gauge({name: 'mssql_io_stall_total', help: 'Wait time (ms) of stall since last restart', labelNames: ['database']}), - }, - query: `SELECT + metrics: { + mssql_io_stall: new client.Gauge({ name: "mssql_io_stall", help: "Wait time (ms) of stall since last restart", labelNames: ["database", "type"] }), + mssql_io_stall_total: new client.Gauge({ name: "mssql_io_stall_total", help: "Wait time (ms) of stall since last restart", labelNames: ["database"] }), + }, + query: `SELECT cast(DB_Name(a.database_id) as varchar) as name, max(io_stall_read_ms), max(io_stall_write_ms), @@ -165,120 +183,133 @@ FROM sys.dm_io_virtual_file_stats(null, null) a INNER JOIN sys.master_files b ON a.database_id = b.database_id and a.file_id = b.file_id group by a.database_id`, - collect: function (rows, metrics) { - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - const database = row[0].value; - const read = row[1].value; - const write = row[2].value; - const stall = row[3].value; - const queued_read = row[4].value; - const queued_write = row[5].value; - debug("Fetch number of stalls for database", database); - metrics.mssql_io_stall_total.set({database: database}, stall); - metrics.mssql_io_stall.set({database: database, type: "read"}, read); - metrics.mssql_io_stall.set({database: database, type: "write"}, write); - metrics.mssql_io_stall.set({database: database, type: "queued_read"}, queued_read); - metrics.mssql_io_stall.set({database: database, type: "queued_write"}, queued_write); - } + collect: function (rows, metrics) { + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const database = row[0].value; + const read = row[1].value; + const write = row[2].value; + const stall = row[3].value; + const queued_read = row[4].value; + const queued_write = row[5].value; + metricsLog("Fetched number of stalls for database", database); + metrics.mssql_io_stall_total.set({ database: database }, stall); + metrics.mssql_io_stall.set({ database: database, type: "read" }, read); + metrics.mssql_io_stall.set({ database: database, type: "write" }, write); + metrics.mssql_io_stall.set({ database: database, type: "queued_read" }, queued_read); + metrics.mssql_io_stall.set({ database: database, type: "queued_write" }, queued_write); } + }, }; const mssql_batch_requests = { - metrics: { - mssql_batch_requests: new client.Gauge({name: 'mssql_batch_requests', help: 'Number of Transact-SQL command batches received per second. This statistic is affected by all constraints (such as I/O, number of users, cachesize, complexity of requests, and so on). High batch requests mean good throughput'}) - }, - query: `SELECT TOP 1 cntr_value + metrics: { + mssql_batch_requests: new client.Gauge({ + name: "mssql_batch_requests", + help: "Number of Transact-SQL command batches received per second. This statistic is affected by all constraints (such as I/O, number of users, cachesize, complexity of requests, and so on). High batch requests mean good throughput", + }), + }, + query: `SELECT TOP 1 cntr_value FROM sys.dm_os_performance_counters where counter_name = 'Batch Requests/sec'`, - collect: function (rows, metrics) { - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - const mssql_batch_requests = row[0].value; - debug("Fetch number of batch requests per second", mssql_batch_requests); - metrics.mssql_batch_requests.set(mssql_batch_requests); - } + collect: function (rows, metrics) { + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const mssql_batch_requests = row[0].value; + metricsLog("Fetched number of batch requests per second", mssql_batch_requests); + metrics.mssql_batch_requests.set(mssql_batch_requests); } + }, }; const mssql_os_process_memory = { - metrics: { - mssql_page_fault_count: new client.Gauge({name: 'mssql_page_fault_count', help: 'Number of page faults since last restart'}), - mssql_memory_utilization_percentage: new client.Gauge({name: 'mssql_memory_utilization_percentage', help: 'Percentage of memory utilization'}), - }, - query: `SELECT page_fault_count, memory_utilization_percentage + metrics: { + mssql_page_fault_count: new client.Gauge({ name: "mssql_page_fault_count", help: "Number of page faults since last restart" }), + mssql_memory_utilization_percentage: new client.Gauge({ name: "mssql_memory_utilization_percentage", help: "Percentage of memory utilization" }), + }, + query: `SELECT page_fault_count, memory_utilization_percentage from sys.dm_os_process_memory`, - collect: function (rows, metrics) { - const page_fault_count = rows[0][0].value; - const memory_utilization_percentage = rows[0][1].value; - debug("Fetch page fault count", page_fault_count); - metrics.mssql_page_fault_count.set(page_fault_count); - metrics.mssql_memory_utilization_percentage.set(memory_utilization_percentage); - } + collect: function (rows, metrics) { + const page_fault_count = rows[0][0].value; + const memory_utilization_percentage = rows[0][1].value; + metricsLog("Fetched page fault count", page_fault_count); + metrics.mssql_page_fault_count.set(page_fault_count); + metrics.mssql_memory_utilization_percentage.set(memory_utilization_percentage); + }, }; const mssql_os_sys_memory = { - metrics: { - mssql_total_physical_memory_kb: new client.Gauge({name: 'mssql_total_physical_memory_kb', help: 'Total physical memory in KB'}), - mssql_available_physical_memory_kb: new client.Gauge({name: 'mssql_available_physical_memory_kb', help: 'Available physical memory in KB'}), - mssql_total_page_file_kb: new client.Gauge({name: 'mssql_total_page_file_kb', help: 'Total page file in KB'}), - mssql_available_page_file_kb: new client.Gauge({name: 'mssql_available_page_file_kb', help: 'Available page file in KB'}), - }, - query: `SELECT total_physical_memory_kb, available_physical_memory_kb, total_page_file_kb, available_page_file_kb + metrics: { + mssql_total_physical_memory_kb: new client.Gauge({ name: "mssql_total_physical_memory_kb", help: "Total physical memory in KB" }), + mssql_available_physical_memory_kb: new client.Gauge({ name: "mssql_available_physical_memory_kb", help: "Available physical memory in KB" }), + mssql_total_page_file_kb: new client.Gauge({ name: "mssql_total_page_file_kb", help: "Total page file in KB" }), + mssql_available_page_file_kb: new client.Gauge({ name: "mssql_available_page_file_kb", help: "Available page file in KB" }), + }, + query: `SELECT total_physical_memory_kb, available_physical_memory_kb, total_page_file_kb, available_page_file_kb from sys.dm_os_sys_memory`, - collect: function (rows, metrics) { - const mssql_total_physical_memory_kb = rows[0][0].value; - const mssql_available_physical_memory_kb = rows[0][1].value; - const mssql_total_page_file_kb = rows[0][2].value; - const mssql_available_page_file_kb = rows[0][3].value; - debug("Fetch system memory information"); - metrics.mssql_total_physical_memory_kb.set(mssql_total_physical_memory_kb); - metrics.mssql_available_physical_memory_kb.set(mssql_available_physical_memory_kb); - metrics.mssql_total_page_file_kb.set(mssql_total_page_file_kb); - metrics.mssql_available_page_file_kb.set(mssql_available_page_file_kb); - } + collect: function (rows, metrics) { + const mssql_total_physical_memory_kb = rows[0][0].value; + const mssql_available_physical_memory_kb = rows[0][1].value; + const mssql_total_page_file_kb = rows[0][2].value; + const mssql_available_page_file_kb = rows[0][3].value; + metricsLog( + "Fetched system memory information", + "Total physical memory", + mssql_total_physical_memory_kb, + "Available physical memory", + mssql_available_physical_memory_kb, + "Total page file", + mssql_total_page_file_kb, + "Available page file", + mssql_available_page_file_kb + ); + metrics.mssql_total_physical_memory_kb.set(mssql_total_physical_memory_kb); + metrics.mssql_available_physical_memory_kb.set(mssql_available_physical_memory_kb); + metrics.mssql_total_page_file_kb.set(mssql_total_page_file_kb); + metrics.mssql_available_page_file_kb.set(mssql_available_page_file_kb); + }, }; const metrics = [ - mssql_instance_local_time, - mssql_connections, - mssql_deadlocks, - mssql_user_errors, - mssql_kill_connection_errors, - mssql_database_state, - mssql_log_growths, - mssql_database_filesize, - mssql_page_life_expectancy, - mssql_io_stall, - mssql_batch_requests, - mssql_os_process_memory, - mssql_os_sys_memory + mssql_instance_local_time, + mssql_connections, + mssql_deadlocks, + mssql_user_errors, + mssql_kill_connection_errors, + mssql_database_state, + mssql_log_growths, + mssql_database_filesize, + mssql_page_life_expectancy, + mssql_io_stall, + mssql_batch_requests, + mssql_os_process_memory, + mssql_os_sys_memory, ]; module.exports = { - client: client, - up: up, - metrics: metrics, + client: client, + mssql_up, + metrics: metrics, }; // DOCUMENTATION of queries and their associated metrics (targeted to DBAs) if (require.main === module) { - metrics.forEach(function (m) { - for(let key in m.metrics) { - if(m.metrics.hasOwnProperty(key)) { - console.log("--", m.metrics[key].name, m.metrics[key].help); - } - } - console.log(m.query + ";"); - console.log(""); - }); + metrics.forEach(function (m) { + for (let key in m.metrics) { + if (m.metrics.hasOwnProperty(key)) { + console.log("--", m.metrics[key].name, m.metrics[key].help); + } + } + console.log(m.query + ";"); + console.log(""); + }); - console.log("/*"); - metrics.forEach(function (m) { - for (let key in m.metrics) { - if(m.metrics.hasOwnProperty(key)) { - console.log("* ", m.metrics[key].name + (m.metrics[key].labelNames.length > 0 ? ( "{" + m.metrics[key].labelNames + "}") : ""), m.metrics[key].help); - } - } - }); - console.log("*/"); -} \ No newline at end of file + console.log("/*"); + metrics.forEach(function (m) { + for (let key in m.metrics) { + if (m.metrics.hasOwnProperty(key)) { + console.log("* ", m.metrics[key].name + (m.metrics[key].labelNames.length > 0 ? "{" + m.metrics[key].labelNames + "}" : ""), m.metrics[key].help); + } + } + }); + console.log("*/"); +} diff --git a/package.json b/package.json index ea8ca5e..a4703e4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "prometheus-mssql-exporter", - "version": "0.5.0", + "version": "1.0.0", "main": "index.js", "private": false, "keywords": [ @@ -23,17 +23,25 @@ "url": "https://github.com/awaragi/prometheus-mssql-exporter/issues" }, "scripts": { - "build": "docker build . -t awaragi/prometheus-mssql-exporter" + "docker:build": "docker build . -t awaragi/prometheus-mssql-exporter", + "docker:run": "export DOCKERID=$(docker build -q .) && docker run --name prometheus-mssql-exporter --rm -it -p 4000:4000 -e SERVER=$(docker inspect mssql | jq -r '.[].NetworkSettings.Networks.bridge.IPAddress') -e USERNAME=SA -e PASSWORD=qkD4x3yy -e DEBUG=app,db,metrics $DOCKERID && docker image rm $DOCKERID", + "start": "DEBUG=app,db,metrics SERVER=localhost USERNAME=SA PASSWORD=qkD4x3yy node ./index.js", + "start:verbose": "DEBUG=app,db,metrics,metrics SERVER=localhost USERNAME=SA PASSWORD=qkD4x3yy node ./index.js", + "test:mssql:2019": "docker run --name mssql --rm -e ACCEPT_EULA=Y -e SA_PASSWORD=qkD4x3yy -p 1433:1433 --name mssql mcr.microsoft.com/mssql/server:2019-latest", + "test:mssql:2017": "docker run --name mssql --rm -e ACCEPT_EULA=Y -e SA_PASSWORD=qkD4x3yy -p 1433:1433 --name mssql mcr.microsoft.com/mssql/server:2017-latest", + "test:fetch": "curl http://localhost:4000/metrics", + "test": "jest test", + "format": "prettier --write ." }, "dependencies": { - "debug": "^2.6.8", + "debug": "2.6.8", "express": "4.15.2", "prom-client": "9.1.1", "tedious": "2.0.0" }, "devDependencies": { - "nodemon": "^1.11.0" + "jest": "27.5.1", + "prettier": "2.6.0", + "superagent": "7.1.1" } } - - diff --git a/test/e2e.test.js b/test/e2e.test.js new file mode 100644 index 0000000..476065e --- /dev/null +++ b/test/e2e.test.js @@ -0,0 +1,89 @@ +const request = require("superagent"); + +function parse(text) { + let lines = text.split("\n"); + lines = lines.filter((line) => !line.startsWith("#")).filter((line) => line.length !== 0); + const o = {}; + lines.forEach((line) => { + expect(line.indexOf(" ")).toBeGreaterThanOrEqual(0); + [key, value] = line.split(" "); + o[key] = parseInt(value); + }); + return o; +} + +describe("E2E Test", function () { + it("Fetch all metrics and ensure that all expected are present", async function () { + const data = await request.get("http://localhost:4000/metrics"); + expect(data.status).toBe(200); + let text = data.text; + const lines = parse(text); + + // some specific tests + expect(lines.mssql_up).toBe(1); + expect(lines.mssql_instance_local_time).toBeGreaterThan(0); + expect(lines.mssql_total_physical_memory_kb).toBeGreaterThan(0); + + // lets remove specific 2019 entries so we have a simple expect to execute + // we are going to assume that if all else is here then it is working + // once we have a version summary metric, we might be able to split this test into multiple ones + delete lines['mssql_database_filesize{database="tempdb",logicalname="tempdev2",type="0",filename="/var/opt/mssql/data/tempdb2.ndf"}']; + delete lines['mssql_database_filesize{database="tempdb",logicalname="tempdev3",type="0",filename="/var/opt/mssql/data/tempdb3.ndf"}']; + delete lines['mssql_database_filesize{database="tempdb",logicalname="tempdev4",type="0",filename="/var/opt/mssql/data/tempdb4.ndf"}']; + + // bulk ensure that all expected results of a vanilla mssql server instance are here + expect(Object.keys(lines)).toEqual([ + "mssql_up", + "mssql_instance_local_time", + 'mssql_connections{database="master",state="current"}', + "mssql_deadlocks", + "mssql_user_errors", + "mssql_kill_connection_errors", + 'mssql_database_state{database="master"}', + 'mssql_database_state{database="tempdb"}', + 'mssql_database_state{database="model"}', + 'mssql_database_state{database="msdb"}', + 'mssql_log_growths{database="tempdb"}', + 'mssql_log_growths{database="model"}', + 'mssql_log_growths{database="msdb"}', + 'mssql_log_growths{database="mssqlsystemresource"}', + 'mssql_log_growths{database="master"}', + 'mssql_database_filesize{database="master",logicalname="master",type="0",filename="/var/opt/mssql/data/master.mdf"}', + 'mssql_database_filesize{database="master",logicalname="mastlog",type="1",filename="/var/opt/mssql/data/mastlog.ldf"}', + 'mssql_database_filesize{database="tempdb",logicalname="tempdev",type="0",filename="/var/opt/mssql/data/tempdb.mdf"}', + 'mssql_database_filesize{database="tempdb",logicalname="templog",type="1",filename="/var/opt/mssql/data/templog.ldf"}', + 'mssql_database_filesize{database="model",logicalname="modeldev",type="0",filename="/var/opt/mssql/data/model.mdf"}', + 'mssql_database_filesize{database="model",logicalname="modellog",type="1",filename="/var/opt/mssql/data/modellog.ldf"}', + 'mssql_database_filesize{database="msdb",logicalname="MSDBData",type="0",filename="/var/opt/mssql/data/MSDBData.mdf"}', + 'mssql_database_filesize{database="msdb",logicalname="MSDBLog",type="1",filename="/var/opt/mssql/data/MSDBLog.ldf"}', + "mssql_page_life_expectancy", + 'mssql_io_stall{database="master",type="read"}', + 'mssql_io_stall{database="master",type="write"}', + 'mssql_io_stall{database="master",type="queued_read"}', + 'mssql_io_stall{database="master",type="queued_write"}', + 'mssql_io_stall{database="tempdb",type="read"}', + 'mssql_io_stall{database="tempdb",type="write"}', + 'mssql_io_stall{database="tempdb",type="queued_read"}', + 'mssql_io_stall{database="tempdb",type="queued_write"}', + 'mssql_io_stall{database="model",type="read"}', + 'mssql_io_stall{database="model",type="write"}', + 'mssql_io_stall{database="model",type="queued_read"}', + 'mssql_io_stall{database="model",type="queued_write"}', + 'mssql_io_stall{database="msdb",type="read"}', + 'mssql_io_stall{database="msdb",type="write"}', + 'mssql_io_stall{database="msdb",type="queued_read"}', + 'mssql_io_stall{database="msdb",type="queued_write"}', + 'mssql_io_stall_total{database="master"}', + 'mssql_io_stall_total{database="tempdb"}', + 'mssql_io_stall_total{database="model"}', + 'mssql_io_stall_total{database="msdb"}', + "mssql_batch_requests", + "mssql_page_fault_count", + "mssql_memory_utilization_percentage", + "mssql_total_physical_memory_kb", + "mssql_available_physical_memory_kb", + "mssql_total_page_file_kb", + "mssql_available_page_file_kb", + ]); + }); +});