Skip to content

Commit

Permalink
Better health (#260)
Browse files Browse the repository at this point in the history
* Sentinel Aliase Naming configuration

* remove consolelogs

* add RPC_STUCK_THRESHOLD and small refactor

* update notification jobs to send health reasons

* add instance name to report

* add RPC_STUCK_THRESHOLD as config obj

* add RPC_STUCK_THRESHOLD to test

* add default value

* fix test

* dumb change
  • Loading branch information
ngmachado authored Jan 17, 2024
1 parent c78d4ba commit bb9ca89
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 68 deletions.
2 changes: 2 additions & 0 deletions src/config/configuration.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class Config {
this.IPFS_GATEWAY = process.env.IPFS_GATEWAY || "https://cloudflare-ipfs.com/ipfs/"
this.PIRATE = this._parseToBool(config.pirate);
this.INSTANCE_NAME = config.INSTANCE_NAME || "Sentinel";
this.RPC_STUCK_THRESHOLD = config.rpc_stuck_threshold;
}

_initializeFromEnvVariables() {
Expand Down Expand Up @@ -112,6 +113,7 @@ class Config {
this.MAX_TX_NUMBER = process.env.MAX_TX_NUMBER || 100;
this.NO_REMOTE_MANIFEST = this._parseToBool(process.env.NO_REMOTE_MANIFEST, false);
this.INSTANCE_NAME = process.env.INSTANCE_NAME || "Sentinel";
this.RPC_STUCK_THRESHOLD = process.env.RPC_STUCK_THRESHOLD || (this.POLLING_INTERVAL * 4) / 1000;
}

_parseToBool(value, defaultValue = false) {
Expand Down
160 changes: 96 additions & 64 deletions src/httpserver/report.js
Original file line number Diff line number Diff line change
@@ -1,71 +1,103 @@
class Report {
constructor (app) {
this.app = app;
this._isSyncingMethodExist = true; //default we will try to call web3.eth.isSyncing.
}

async checkDatabase () {
try {
return (await this.app.db.sysQueries.healthCheck()) !== undefined;
} catch (err) {
this.app.logger.error(`Report.checkDatabase(): ${err}`);
return false;
constructor(app) {
this.app = app;
this._isSyncingMethodExist = true; //default we will try to call web3.eth.isSyncing.
}
}

async fullReport () {
let rpcIsSyncing = false;
// not available on all networks
if(this._isSyncingMethodExist) {
try {
rpcIsSyncing = await this.app.client.RPCClient.web3.eth.isSyncing();
} catch(err) {
this._isSyncingMethodExist = false;
this.app.logger.error(`report.fullReport() - web3.eth.isSyncing failed: ${err}`);
}

async checkDatabase() {
try {
const isHealthy = (await this.app.db.sysQueries.healthCheck()) !== undefined;
return {isHealthy, reason: ''};
} catch (err) {
this.app.logger.error(`Report.checkDatabase(): ${err}`);
return {isHealthy: false, reason: `Database check failed: ${err.message}`};
}
}
const rpcProvider = (new URL(this.app.config.HTTP_RPC_NODE)).hostname;
const databaseOk = await this.checkDatabase();
const estimationQueueSize = this.app.queues.getEstimationQueueLength();
const agreementQueueSize = this.app.queues.getAgreementQueueLength();
const lastTimeNewBlocks = this.app.eventTracker.lastTimeNewBlocks;
const waitingForNewBlocksSince = Math.floor(Math.abs(new Date() - lastTimeNewBlocks) / 1000);
const RPCStuck = waitingForNewBlocksSince * 1000 > this.app.config.POLLING_INTERVAL * 2;
const overallHealthy = rpcIsSyncing === false && databaseOk && !RPCStuck;
return {
timestamp: Date.now(),
healthy: overallHealthy,
process: {
uptime: Math.floor(process.uptime()),
pid: process.pid
},
network: {
chainId: await this.app.client.getChainId(),
rpc: {
rpcProvider: rpcProvider,
totalRequests: this.app.client.getTotalRequests(),
isSyncing: rpcIsSyncing,
lastTimeNewBlocks: lastTimeNewBlocks,
waitingForNewBlocksSince: waitingForNewBlocksSince,
msg: this._isSyncingMethodExist ? "" : "RPC doesn't implement web3.eth.isSyncing",

async checkRPCSyncing() {
if (!this._isSyncingMethodExist) {
return {isHealthy: true, reason: 'RPC does not implement web3.eth.isSyncing'};
}
},
account: {
address: this.app.client.getAccountAddress(),
balance: await this.app.client.getAccountBalance()
},
queues: {
agreementQueue: agreementQueueSize,
estimationQueue: estimationQueueSize
},
protocol: {
cfa: this.app.client.contracts.getCFAv1Address(),
ida: this.app.client.contracts.getIDAv1Address(),
gda: this.app.client.contracts.getGDAv1Address(),
supertokens: Object.values(this.app.client.superToken.superTokenNames)
}
};
}

try {
const isSyncing = await this.app.client.RPCClient.web3.eth.isSyncing();
return {isHealthy: !isSyncing, reason: isSyncing ? 'RPC is syncing' : ''};
} catch (err) {
this._isSyncingMethodExist = false;
this.app.logger.error('Report.checkRPCSyncing()', err);
return {isHealthy: false, reason: `RPC syncing check failed: ${err.message}`};
}
}

async checkRPCStuck() {

const waitingForNewBlocksSince = this.awaitingForNewBlocksSince();
const rpcStuckThreshold = this.app.config.RPC_STUCK_THRESHOLD;
const isStuck = waitingForNewBlocksSince > rpcStuckThreshold;
const reason = isStuck ? `RPC is stuck. No new blocks for ${waitingForNewBlocksSince} s` : '';
return {isHealthy: !isStuck, reason};
}

awaitingForNewBlocksSince() {
const currentTime = Date.now();
const lastTimeNewBlocks = this.app.eventTracker.lastTimeNewBlocks.getTime();
return Math.floor(Math.abs(currentTime - lastTimeNewBlocks) / 1000);
}


async fullReport() {

const healthDiagnostics = {
database: await this.checkDatabase(),
rpcSyncing: await this.checkRPCSyncing(),
rpcStuck: await this.checkRPCStuck()
};

const overallHealthy = Object.values(healthDiagnostics).every(check => check.isHealthy);
const reasons = Object.entries(healthDiagnostics)
.filter(([_, check]) => !check.isHealthy)
.map(([key, check]) => `${key}: ${check.reason}`);

return {
timestamp: Date.now(),
healthy: overallHealthy,
reasons: reasons,

process: {
uptime: Math.floor(process.uptime()),
pid: process.pid
},

network: {
chainId: await this.app.client.getChainId(),
rpc: {
rpcProvider: (new URL(this.app.config.HTTP_RPC_NODE)).hostname,
totalRequests: this.app.client.getTotalRequests(),
isSyncing: healthDiagnostics.rpcSyncing.isHealthy,
lastTimeNewBlocks: this.app.eventTracker.lastTimeNewBlocks,
waitingForNewBlocksSince: this.awaitingForNewBlocksSince(),
msg: this._isSyncingMethodExist ? "" : "RPC doesn't implement web3.eth.isSyncing",
}
},

account: {
address: this.app.client.getAccountAddress(),
balance: (await this.app.client.getAccountBalance()).toString(),
},

queues: {
agreementQueue: this.app.queues.getAgreementQueueLength(),
estimationQueue: this.app.queues.getEstimationQueueLength()
},

protocol: {
cfa: this.app.client.contracts.getCFAv1Address(),
ida: this.app.client.contracts.getIDAv1Address(),
gda: this.app.client.contracts.getGDAv1Address(),
supertokens: Object.values(this.app.client.superToken.superTokenNames)
}
};
}
}

module.exports = Report;
2 changes: 1 addition & 1 deletion src/services/notificationJobs.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class NotificationJobs {
async sendReport () {
const healthcheck = await this.app.healthReport.fullReport();
if(!healthcheck.healthy) {
const healthData = `Healthy: ${healthcheck.healthy}\nChainId: ${healthcheck.network.chainId}`;
const healthData = `Instance Name: ${this.app.config.INSTANCE_NAME}\nHealthy: ${healthcheck.healthy}\nChainId: ${healthcheck.network.chainId}\nReasons: ${healthcheck.reasons.join('\n')}`;
this.app.notifier.sendNotification(healthData);
}
const currentTime = Date.now();
Expand Down
1 change: 1 addition & 0 deletions test/integration/ida.integration.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ describe("IDA integration tests", () => {
await app.shutdown();
protocolHelper.expectLiquidationV2(result[0], AGENT_ACCOUNT, accounts[0], "0");
} catch (err) {

protocolHelper.exitWithError(err);
}
});
Expand Down
2 changes: 1 addition & 1 deletion test/integration/node.integration.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ describe("Agent configurations tests", () => {
try {
await helper.operations.createStream(helper.sf.superToken.options.address, accounts[0], accounts[2], "100000000000");
await ganache.helper.timeTravelOnce(provider, web3, 1);
await bootNode({resolver: helper.sf.resolver.options.address});
await bootNode({resolver: helper.sf.resolver.options.address, rpc_stuck_threshold: 10});
let healthy;
while (true) {
await protocolHelper.timeout(9000);
Expand Down
5 changes: 4 additions & 1 deletion test/unit-tests/services/notificationJobs.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ describe("NotificationJobs", () => {
sandbox = sinon.createSandbox();
appMock = {
healthReport: {
fullReport: sinon.stub().resolves({ healthy: false, network: { chainId: 1 } }),
fullReport: sinon.stub().resolves({ healthy: false, network: { chainId: 1 }, reasons: ["test"] }),
},
notifier: {
sendNotification: sinon.stub(),
Expand All @@ -22,6 +22,9 @@ describe("NotificationJobs", () => {
logger: {
info: sinon.stub(),
},
config: {
INSTANCE_NAME: "test",
},
_isShutdown: false,
};

Expand Down
3 changes: 2 additions & 1 deletion test/utils/protocolHelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ function getSentinelConfig(config) {
additional_liquidation_delay: 0,
block_offset: 1,
liquidation_job_awaits: 5000,
fastsync: "false"
fastsync: "false",
rpc_stuck_threshold: 100,
};

return { ...myBaseConfig, ...config };
Expand Down

0 comments on commit bb9ca89

Please sign in to comment.