diff --git a/.env-example b/.env-example index 3303e901..f09cd32c 100644 --- a/.env-example +++ b/.env-example @@ -120,6 +120,16 @@ HTTP_RPC_NODE= ## When running with Docker, this will affect the host port binding, not the binding inside the container. #METRICS_PORT=9100 +# Let the sentinel instance periodically report a basic metrics to a remote server. +# Set this to false in order to disable it. +#TELEMETRY=true + +# Default telemetry server instance provided by Superfluid +#TELEMETRY_URL=https://sentinel-telemetry.x.superfluid.dev + +# Reporting interval, defaults to 12 hours +#TELEMETRY_INTERVAL=43200 + ## If set, you get notified about key events like process (re)starts, configuration changes and error conditions ## to the Slack channel the hook belongs to. #SLACK_WEBHOOK_URL= diff --git a/Dockerfile b/Dockerfile index 366d70b0..0190418d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ COPY ["package.json", "package-lock.json*", "./"] RUN npm ci --only=production COPY . /app -RUN mkdir data +# make sure we can write the data directory RUN chown node:node data # Add a simple init system so that Node would respect process signals diff --git a/README.md b/README.md index 5483adce..6d1b6a16 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ If all is well, you may want to set the service to autostart: systemctl enable superfluid-sentinel.service ``` -### Monitoring & Alerting +### Monitoring, Alerting & Telemetry The sentinel can provide monitoring information. In the default configuration, this is available on port 9100 and json formatted. @@ -98,6 +98,18 @@ In order to set up notifications, see `.env-example` for the relevant configurat The notification system is modular. If you want support for more channels, consider adding it. See `src/services/slackNotifier.js` for a blueprint. PRs are welcome! +Sentinel instances also periodically (default: every 12 hours) report basic metrics to a telemetry endpoint. +This helps understanding how many instances are active and what their approximate configuration is. +Reported metrics: +* uuid (randomly generated on first start and preserved in a file "data/uuid.txt") +* chain i +* nodejs version +* sentinel version +* healthy flag (false e.g. if the configured RPC is drifting) +* nr of rpc requests (since last restart) +* account balance (rounded to 3 decimal places) +* memory used by the process + #### Run multiple instances In order to run sentinels for multiple networks in parallel, create network specific env files which are diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/package-lock.json b/package-lock.json index 1a267b10..01b5ecb0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -28,6 +28,7 @@ "prom-client": "^14.0.1", "sequelize": "^6.12.5", "sqlite3": "^5.0.2", + "uuid": "^9.0.1", "web3": "1.6.1", "winston": "^3.3.3" }, @@ -2324,14 +2325,6 @@ "node": ">=8" } }, - "node_modules/@truffle/contract/node_modules/uuid": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", - "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==", - "bin": { - "uuid": "dist/bin/uuid" - } - }, "node_modules/@truffle/contract/node_modules/web3": { "version": "1.8.2", "resolved": "https://registry.npmjs.org/web3/-/web3-1.8.2.tgz", @@ -2874,14 +2867,6 @@ "node": ">=8" } }, - "node_modules/@truffle/interface-adapter/node_modules/uuid": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", - "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==", - "bin": { - "uuid": "dist/bin/uuid" - } - }, "node_modules/@truffle/interface-adapter/node_modules/web3": { "version": "1.8.2", "resolved": "https://registry.npmjs.org/web3/-/web3-1.8.2.tgz", @@ -5194,6 +5179,14 @@ "uuid": "^8.3.2" } }, + "node_modules/ethereumjs-wallet/node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/ethers": { "version": "4.0.49", "resolved": "https://registry.npmjs.org/ethers/-/ethers-4.0.49.tgz", @@ -18031,6 +18024,14 @@ "node": ">=10" } }, + "node_modules/sequelize/node_modules/uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/serialize-javascript": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", @@ -19275,9 +19276,13 @@ } }, "node_modules/uuid": { - "version": "8.3.2", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", - "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==", + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], "bin": { "uuid": "dist/bin/uuid" } @@ -22510,11 +22515,6 @@ } } }, - "uuid": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", - "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==" - }, "web3": { "version": "1.8.2", "resolved": "https://registry.npmjs.org/web3/-/web3-1.8.2.tgz", @@ -22947,11 +22947,6 @@ } } }, - "uuid": { - "version": "9.0.0", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", - "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==" - }, "web3": { "version": "1.8.2", "resolved": "https://registry.npmjs.org/web3/-/web3-1.8.2.tgz", @@ -24876,6 +24871,13 @@ "scrypt-js": "^3.0.1", "utf8": "^3.0.0", "uuid": "^8.3.2" + }, + "dependencies": { + "uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==" + } } }, "ethers": { @@ -34681,6 +34683,11 @@ "requires": { "lru-cache": "^6.0.0" } + }, + "uuid": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", + "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==" } } }, @@ -35664,9 +35671,9 @@ "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==" }, "uuid": { - "version": "8.3.2", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", - "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==" + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==" }, "validate-npm-package-license": { "version": "3.0.4", diff --git a/package.json b/package.json index d2e730da..86e05ebf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "superfluid-sentinel", - "version": "0.10.0", + "version": "0.11.0", "description": "Superfluid Sentinel", "main": "main.js", "scripts": { @@ -43,6 +43,7 @@ "prom-client": "^14.0.1", "sequelize": "^6.12.5", "sqlite3": "^5.0.2", + "uuid": "^9.0.1", "web3": "1.6.1", "winston": "^3.3.3" }, diff --git a/src/app.js b/src/app.js index 1644416a..c8109733 100644 --- a/src/app.js +++ b/src/app.js @@ -20,6 +20,7 @@ const Notifier = require("./services/notifier"); const SlackNotifier = require("./services/slackNotifier"); const TelegramNotifier = require("./services/telegramNotifier"); const NotifierJobs = require("./services/notificationJobs"); +const Telemetry = require("./services/telemetry"); const Errors = require("./utils/errors/errors"); const { wad4human } = require("@decentral.ee/web3-helpers"); @@ -61,6 +62,7 @@ class App { this.healthReport = new Report(this); this.server = new HTTPServer(this); + this.telemetry = new Telemetry(this); this.timer = new Timer(); this.notifier = new Notifier(this); @@ -141,6 +143,8 @@ class App { counter--; } } + this.logger.info(`app.shutdown() - clear interval`); + clearInterval(this._telemetryIntervalId); this.logger.info(`app.shutdown() - closing database`); await this.db.close(); } catch (err) { @@ -233,6 +237,11 @@ class App { if (this.config.METRICS === true) { this.timer.startAfter(this.server); } + // start reporting services with the configured interval. + if(this.config.TELEMETRY) { + this.logger.info(`Starting telemetry job with interval ${this.config.TELEMETRY_INTERVAL}`); + this._telemetryIntervalId = this.timer.triggerInterval(() => this.telemetry.start(), this.config.TELEMETRY_INTERVAL); + } // Only start notification jobs if notifier is enabled if (this.notificationJobs) { this.logger.info(`Starting notification jobs`); diff --git a/src/config/configuration.js b/src/config/configuration.js index b4df2069..18096bff 100644 --- a/src/config/configuration.js +++ b/src/config/configuration.js @@ -92,6 +92,9 @@ class Config { this.SLACK_WEBHOOK_URL = process.env.SLACK_WEBHOOK_URL; this.TELEGRAM_BOT_TOKEN = process.env.TELEGRAM_BOT_TOKEN; this.TELEGRAM_CHAT_ID = process.env.TELEGRAM_CHAT_ID; + this.TELEMETRY = this._parseToBool(process.env.TELEMETRY, true); + this.TELEMETRY_URL = process.env.TELEMETRY_URL || "https://sentinel-telemetry.x.superfluid.dev"; + this.TELEMETRY_INTERVAL = process.env.TELEMETRY_INTERVAL * 1000 || 43200000; // defaults to 12 hours // extra options: undoc and excluded from cmdline parser. Use .env file to change the defaults. this.CONCURRENCY = process.env.CONCURRENCY || 1; @@ -192,7 +195,7 @@ class Config { MAX_TX_NUMBER: this.MAX_TX_NUMBER, SLACK_WEBHOOK_URL: this.SLACK_WEBHOOK_URL, TELEGRAM_BOT_TOKEN: this.TELEGRAM_BOT_TOKEN, - TELEGRAM_CHAT_ID: this.TELEGRAM_CHAT_ID + TELEGRAM_CHAT_ID: this.TELEGRAM_CHAT_ID, }; } } diff --git a/src/config/loadCmdArgs.js b/src/config/loadCmdArgs.js index fbcac46f..dd0a72fa 100644 --- a/src/config/loadCmdArgs.js +++ b/src/config/loadCmdArgs.js @@ -26,6 +26,7 @@ program .option("--pic [value]", "PIC Address (default: not set)") .option("--observer", "Set sentinel to observer (default: not set)") .option("--no-fastsync", "Don't use fastsync feature (default: not set)") + .option("--no-telemetry", "Don't use telemetry feature (default: not set)") .action(function (args) { if (args.httpRpcNode !== undefined) { process.env.HTTP_RPC_NODE = args.httpRpcNode; @@ -72,5 +73,8 @@ program if(args.fastsync === false) { // the prefix no- is treated differently process.env.FASTSYNC = "false"; } + if(args.telemetry === false) { // the prefix no- is treated differently + process.env.TELEMETRY = "false"; + } }); program.parse(process.argv); diff --git a/src/httpserver/report.js b/src/httpserver/report.js index 4b98a4be..2c9a4419 100644 --- a/src/httpserver/report.js +++ b/src/httpserver/report.js @@ -24,7 +24,7 @@ class Report { this.app.logger.error(`report.fullReport() - web3.eth.isSyncing failed: ${err}`); } } - + const rpcProvider = (new URL(this.app.config.HTTP_RPC_NODE)).hostname; const databaseOk = await this.checkDatabase(); const estimationQueueSize = this.app.queues.getEstimationQueueLength(); const agreementQueueSize = this.app.queues.getAgreementQueueLength(); @@ -45,6 +45,7 @@ class Report { network: { chainId: await this.app.client.getChainId(), rpc: { + rpcProvider: rpcProvider, totalRequests: this.app.client.getTotalRequests(), isSyncing: rpcIsSyncing, lastTimeNewBlocks: lastTimeNewBlocks, diff --git a/src/services/telemetry.js b/src/services/telemetry.js new file mode 100644 index 00000000..f0697337 --- /dev/null +++ b/src/services/telemetry.js @@ -0,0 +1,99 @@ +const axios = require("axios"); +const { v4: uuidv4 } = require('uuid'); +const fs = require("fs"); +const appVersion = require("../../package").version; + +const UUID_FILE = "data/uuid.txt"; + +// Implements functionality for creating prometheus formatted reports and sending them to a telemetry endpoint. +class Telemetry { + + constructor(app) { + this.app = app; + this._isShutdown = false; + this.uuid = undefined; + } + + async start() { + try { + if (this.app._isShutdown) { + this._isShutdown = true; + this.app.logger.info(`app.shutdown() - closing telemetry`); + return; + } + + // Read persisted uuid or create new one if it doesn't exist. + if (this.uuid === undefined) { + this.app.logger.debug("trying to load uuid from file"); + try { + this.uuid = fs.readFileSync(UUID_FILE, "utf8"); + this.app.logger.debug(`loaded uuid: ${this.uuid}`); + } catch (err) { + this.app.logger.debug("uuid.txt not found, creating new uuid"); + this.uuid = uuidv4(); + fs.writeFileSync(UUID_FILE, this.uuid); + this.app.logger.info(`created new uuid: ${this.uuid}`); + } + } + + if(this.app.config.TELEMETRY_URL) { + const reportData = this.createReport(await this.app.healthReport.fullReport()); + this.app.logger.info(`sending data to telemetry with uuid ${this.uuid}`); + const resp = await axios({ + method: 'post', + url: this.app.config.TELEMETRY_URL, + data: reportData, + headers: { 'Content-Type': 'text/plain' }, + }); + + return { + error: undefined, + msg: resp + } + } else { + return { + error: new Error("Telemetry.start() - no endpoint to send data"), + msg: undefined + } + } + } catch(err) { + this.app.logger.error(`Telemetry.sendReport() - ${err}`); + return { + error: err, + msg: undefined + }; + } + } + + // returns a telemetry report (as string), reusing data from a provided health report. + // TODO: add info about pic config, nr of observed tokens, estimation points, nr of upcoming liquidations, nr of DB queries + createReport(healthReport) { + // only include labels which don't have high cardinality + const labels = `app_uuid="${this.uuid}",chain_id="${healthReport.network.chainId}",app_version="${appVersion}",node_version="${process.version}"`; + + return ` +# HELP sentinel_telemetry_uptime Total uptime of the application in seconds. +# TYPE sentinel_telemetry_uptime gauge +sentinel_telemetry_uptime{${labels}} ${healthReport.process.uptime} + +# HELP sentinel_telemetry_healthy Health status of the application, 1 for healthy and 0 for unhealthy. +# TYPE sentinel_telemetry_healthy gauge +sentinel_telemetry_healthy{${labels}} ${healthReport.healthy ? 1 : 0} + +# HELP sentinel_telemetry_rpc_requests Total number of RPC requests made since last restart. +# TYPE sentinel_telemetry_rpc_requests counter +sentinel_telemetry_rpc_requests{${labels}} ${healthReport.network.rpc.totalRequests} +` ++ (healthReport.account.balance ? // undefined in observer mode +`# HELP sentinel_telemetry_account_balance Balance of the monitored account, rounded to 3 decimal places. +# TYPE sentinel_telemetry_account_balance gauge +sentinel_telemetry_account_balance{${labels}} ${Math.floor(parseInt(healthReport.account.balance) / 1e15) / 1e3}` : "") ++ +`# HELP sentinel_telemetry_memory_used Amount of memory used by the process in bytes, as reported by process.memoryUsage().heapUsed +# TYPE sentinel_telemetry_memory_used gauge +sentinel_telemetry_memory_used{${labels}} ${process.memoryUsage().heapUsed} +`; + } +} + +module.exports = Telemetry; \ No newline at end of file diff --git a/src/utils/timer.js b/src/utils/timer.js index 6b51ec59..2cd46bf0 100644 --- a/src/utils/timer.js +++ b/src/utils/timer.js @@ -9,6 +9,10 @@ class Timer { return fn.start(); } + async triggerInterval(fn, time) { + return setInterval(fn, time); + } + async startAfter(fn, data, ms= 1000) { setTimeout(() => fn.start(data), 1000); }