Skip to content

Commit

Permalink
[SENTINEL] Add periodic telemetry reporting (#186)
Browse files Browse the repository at this point in the history
  • Loading branch information
ngmachado authored Oct 3, 2023
1 parent f1b1a99 commit 300434f
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 37 deletions.
10 changes: 10 additions & 0 deletions .env-example
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ HTTP_RPC_NODE=
## When running with Docker, this will affect the host port binding, not the binding inside the container.
#METRICS_PORT=9100

# Let the sentinel instance periodically report a basic metrics to a remote server.
# Set this to false in order to disable it.
#TELEMETRY=true

# Default telemetry server instance provided by Superfluid
#TELEMETRY_URL=https://sentinel-telemetry.x.superfluid.dev

# Reporting interval, defaults to 12 hours
#TELEMETRY_INTERVAL=43200

## If set, you get notified about key events like process (re)starts, configuration changes and error conditions
## to the Slack channel the hook belongs to.
#SLACK_WEBHOOK_URL=
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ COPY ["package.json", "package-lock.json*", "./"]
RUN npm ci --only=production
COPY . /app

RUN mkdir data
# make sure we can write the data directory
RUN chown node:node data

# Add a simple init system so that Node would respect process signals
Expand Down
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ If all is well, you may want to set the service to autostart:
systemctl enable superfluid-sentinel.service
```

### Monitoring & Alerting
### Monitoring, Alerting & Telemetry

The sentinel can provide monitoring information. In the default configuration, this is available on port 9100 and json formatted.

Expand All @@ -98,6 +98,18 @@ In order to set up notifications, see `.env-example` for the relevant configurat

The notification system is modular. If you want support for more channels, consider adding it. See `src/services/slackNotifier.js` for a blueprint. PRs are welcome!

Sentinel instances also periodically (default: every 12 hours) report basic metrics to a telemetry endpoint.
This helps understanding how many instances are active and what their approximate configuration is.
Reported metrics:
* uuid (randomly generated on first start and preserved in a file "data/uuid.txt")
* chain i
* nodejs version
* sentinel version
* healthy flag (false e.g. if the configured RPC is drifting)
* nr of rpc requests (since last restart)
* account balance (rounded to 3 decimal places)
* memory used by the process

#### Run multiple instances

In order to run sentinels for multiple networks in parallel, create network specific env files which are
Expand Down
Empty file added data/.gitkeep
Empty file.
71 changes: 39 additions & 32 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "superfluid-sentinel",
"version": "0.10.0",
"version": "0.11.0",
"description": "Superfluid Sentinel",
"main": "main.js",
"scripts": {
Expand Down Expand Up @@ -43,6 +43,7 @@
"prom-client": "^14.0.1",
"sequelize": "^6.12.5",
"sqlite3": "^5.0.2",
"uuid": "^9.0.1",
"web3": "1.6.1",
"winston": "^3.3.3"
},
Expand Down
9 changes: 9 additions & 0 deletions src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const Notifier = require("./services/notifier");
const SlackNotifier = require("./services/slackNotifier");
const TelegramNotifier = require("./services/telegramNotifier");
const NotifierJobs = require("./services/notificationJobs");
const Telemetry = require("./services/telemetry");
const Errors = require("./utils/errors/errors");
const { wad4human } = require("@decentral.ee/web3-helpers");

Expand Down Expand Up @@ -61,6 +62,7 @@ class App {

this.healthReport = new Report(this);
this.server = new HTTPServer(this);
this.telemetry = new Telemetry(this);
this.timer = new Timer();

this.notifier = new Notifier(this);
Expand Down Expand Up @@ -141,6 +143,8 @@ class App {
counter--;
}
}
this.logger.info(`app.shutdown() - clear interval`);
clearInterval(this._telemetryIntervalId);
this.logger.info(`app.shutdown() - closing database`);
await this.db.close();
} catch (err) {
Expand Down Expand Up @@ -233,6 +237,11 @@ class App {
if (this.config.METRICS === true) {
this.timer.startAfter(this.server);
}
// start reporting services with the configured interval.
if(this.config.TELEMETRY) {
this.logger.info(`Starting telemetry job with interval ${this.config.TELEMETRY_INTERVAL}`);
this._telemetryIntervalId = this.timer.triggerInterval(() => this.telemetry.start(), this.config.TELEMETRY_INTERVAL);
}
// Only start notification jobs if notifier is enabled
if (this.notificationJobs) {
this.logger.info(`Starting notification jobs`);
Expand Down
5 changes: 4 additions & 1 deletion src/config/configuration.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ class Config {
this.SLACK_WEBHOOK_URL = process.env.SLACK_WEBHOOK_URL;
this.TELEGRAM_BOT_TOKEN = process.env.TELEGRAM_BOT_TOKEN;
this.TELEGRAM_CHAT_ID = process.env.TELEGRAM_CHAT_ID;
this.TELEMETRY = this._parseToBool(process.env.TELEMETRY, true);
this.TELEMETRY_URL = process.env.TELEMETRY_URL || "https://sentinel-telemetry.x.superfluid.dev";
this.TELEMETRY_INTERVAL = process.env.TELEMETRY_INTERVAL * 1000 || 43200000; // defaults to 12 hours

// extra options: undoc and excluded from cmdline parser. Use .env file to change the defaults.
this.CONCURRENCY = process.env.CONCURRENCY || 1;
Expand Down Expand Up @@ -192,7 +195,7 @@ class Config {
MAX_TX_NUMBER: this.MAX_TX_NUMBER,
SLACK_WEBHOOK_URL: this.SLACK_WEBHOOK_URL,
TELEGRAM_BOT_TOKEN: this.TELEGRAM_BOT_TOKEN,
TELEGRAM_CHAT_ID: this.TELEGRAM_CHAT_ID
TELEGRAM_CHAT_ID: this.TELEGRAM_CHAT_ID,
};
}
}
Expand Down
4 changes: 4 additions & 0 deletions src/config/loadCmdArgs.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ program
.option("--pic [value]", "PIC Address (default: not set)")
.option("--observer", "Set sentinel to observer (default: not set)")
.option("--no-fastsync", "Don't use fastsync feature (default: not set)")
.option("--no-telemetry", "Don't use telemetry feature (default: not set)")
.action(function (args) {
if (args.httpRpcNode !== undefined) {
process.env.HTTP_RPC_NODE = args.httpRpcNode;
Expand Down Expand Up @@ -72,5 +73,8 @@ program
if(args.fastsync === false) { // the prefix no- is treated differently
process.env.FASTSYNC = "false";
}
if(args.telemetry === false) { // the prefix no- is treated differently
process.env.TELEMETRY = "false";
}
});
program.parse(process.argv);
3 changes: 2 additions & 1 deletion src/httpserver/report.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Report {
this.app.logger.error(`report.fullReport() - web3.eth.isSyncing failed: ${err}`);
}
}

const rpcProvider = (new URL(this.app.config.HTTP_RPC_NODE)).hostname;
const databaseOk = await this.checkDatabase();
const estimationQueueSize = this.app.queues.getEstimationQueueLength();
const agreementQueueSize = this.app.queues.getAgreementQueueLength();
Expand All @@ -45,6 +45,7 @@ class Report {
network: {
chainId: await this.app.client.getChainId(),
rpc: {
rpcProvider: rpcProvider,
totalRequests: this.app.client.getTotalRequests(),
isSyncing: rpcIsSyncing,
lastTimeNewBlocks: lastTimeNewBlocks,
Expand Down
Loading

0 comments on commit 300434f

Please sign in to comment.