diff --git a/src/config/configuration.js b/src/config/configuration.js index 2c833ad6..49ef33a4 100644 --- a/src/config/configuration.js +++ b/src/config/configuration.js @@ -66,6 +66,7 @@ class Config { this.IPFS_GATEWAY = process.env.IPFS_GATEWAY || "https://cloudflare-ipfs.com/ipfs/" this.PIRATE = this._parseToBool(config.pirate); this.INSTANCE_NAME = config.INSTANCE_NAME || "Sentinel"; + this.RPC_STUCK_THRESHOLD = config.rpc_stuck_threshold; } _initializeFromEnvVariables() { @@ -112,6 +113,7 @@ class Config { this.MAX_TX_NUMBER = process.env.MAX_TX_NUMBER || 100; this.NO_REMOTE_MANIFEST = this._parseToBool(process.env.NO_REMOTE_MANIFEST, false); this.INSTANCE_NAME = process.env.INSTANCE_NAME || "Sentinel"; + this.RPC_STUCK_THRESHOLD = process.env.RPC_STUCK_THRESHOLD || (this.POLLING_INTERVAL * 4) / 1000; } _parseToBool(value, defaultValue = false) { diff --git a/src/httpserver/report.js b/src/httpserver/report.js index ebb81af6..83026741 100644 --- a/src/httpserver/report.js +++ b/src/httpserver/report.js @@ -1,71 +1,103 @@ class Report { - constructor (app) { - this.app = app; - this._isSyncingMethodExist = true; //default we will try to call web3.eth.isSyncing. - } - - async checkDatabase () { - try { - return (await this.app.db.sysQueries.healthCheck()) !== undefined; - } catch (err) { - this.app.logger.error(`Report.checkDatabase(): ${err}`); - return false; + constructor(app) { + this.app = app; + this._isSyncingMethodExist = true; //default we will try to call web3.eth.isSyncing. } - } - - async fullReport () { - let rpcIsSyncing = false; - // not available on all networks - if(this._isSyncingMethodExist) { - try { - rpcIsSyncing = await this.app.client.RPCClient.web3.eth.isSyncing(); - } catch(err) { - this._isSyncingMethodExist = false; - this.app.logger.error(`report.fullReport() - web3.eth.isSyncing failed: ${err}`); - } + + async checkDatabase() { + try { + const isHealthy = (await this.app.db.sysQueries.healthCheck()) !== undefined; + return {isHealthy, reason: ''}; + } catch (err) { + this.app.logger.error(`Report.checkDatabase(): ${err}`); + return {isHealthy: false, reason: `Database check failed: ${err.message}`}; + } } - const rpcProvider = (new URL(this.app.config.HTTP_RPC_NODE)).hostname; - const databaseOk = await this.checkDatabase(); - const estimationQueueSize = this.app.queues.getEstimationQueueLength(); - const agreementQueueSize = this.app.queues.getAgreementQueueLength(); - const lastTimeNewBlocks = this.app.eventTracker.lastTimeNewBlocks; - const waitingForNewBlocksSince = Math.floor(Math.abs(new Date() - lastTimeNewBlocks) / 1000); - const RPCStuck = waitingForNewBlocksSince * 1000 > this.app.config.POLLING_INTERVAL * 2; - const overallHealthy = rpcIsSyncing === false && databaseOk && !RPCStuck; - return { - timestamp: Date.now(), - healthy: overallHealthy, - process: { - uptime: Math.floor(process.uptime()), - pid: process.pid - }, - network: { - chainId: await this.app.client.getChainId(), - rpc: { - rpcProvider: rpcProvider, - totalRequests: this.app.client.getTotalRequests(), - isSyncing: rpcIsSyncing, - lastTimeNewBlocks: lastTimeNewBlocks, - waitingForNewBlocksSince: waitingForNewBlocksSince, - msg: this._isSyncingMethodExist ? "" : "RPC doesn't implement web3.eth.isSyncing", + + async checkRPCSyncing() { + if (!this._isSyncingMethodExist) { + return {isHealthy: true, reason: 'RPC does not implement web3.eth.isSyncing'}; } - }, - account: { - address: this.app.client.getAccountAddress(), - balance: await this.app.client.getAccountBalance() - }, - queues: { - agreementQueue: agreementQueueSize, - estimationQueue: estimationQueueSize - }, - protocol: { - cfa: this.app.client.contracts.getCFAv1Address(), - ida: this.app.client.contracts.getIDAv1Address(), - gda: this.app.client.contracts.getGDAv1Address(), - supertokens: Object.values(this.app.client.superToken.superTokenNames) - } - }; - } + + try { + const isSyncing = await this.app.client.RPCClient.web3.eth.isSyncing(); + return {isHealthy: !isSyncing, reason: isSyncing ? 'RPC is syncing' : ''}; + } catch (err) { + this._isSyncingMethodExist = false; + this.app.logger.error('Report.checkRPCSyncing()', err); + return {isHealthy: false, reason: `RPC syncing check failed: ${err.message}`}; + } + } + + async checkRPCStuck() { + + const waitingForNewBlocksSince = this.awaitingForNewBlocksSince(); + const rpcStuckThreshold = this.app.config.RPC_STUCK_THRESHOLD; + const isStuck = waitingForNewBlocksSince > rpcStuckThreshold; + const reason = isStuck ? `RPC is stuck. No new blocks for ${waitingForNewBlocksSince} s` : ''; + return {isHealthy: !isStuck, reason}; + } + + awaitingForNewBlocksSince() { + const currentTime = Date.now(); + const lastTimeNewBlocks = this.app.eventTracker.lastTimeNewBlocks.getTime(); + return Math.floor(Math.abs(currentTime - lastTimeNewBlocks) / 1000); + } + + + async fullReport() { + + const healthDiagnostics = { + database: await this.checkDatabase(), + rpcSyncing: await this.checkRPCSyncing(), + rpcStuck: await this.checkRPCStuck() + }; + + const overallHealthy = Object.values(healthDiagnostics).every(check => check.isHealthy); + const reasons = Object.entries(healthDiagnostics) + .filter(([_, check]) => !check.isHealthy) + .map(([key, check]) => `${key}: ${check.reason}`); + + return { + timestamp: Date.now(), + healthy: overallHealthy, + reasons: reasons, + + process: { + uptime: Math.floor(process.uptime()), + pid: process.pid + }, + + network: { + chainId: await this.app.client.getChainId(), + rpc: { + rpcProvider: (new URL(this.app.config.HTTP_RPC_NODE)).hostname, + totalRequests: this.app.client.getTotalRequests(), + isSyncing: healthDiagnostics.rpcSyncing.isHealthy, + lastTimeNewBlocks: this.app.eventTracker.lastTimeNewBlocks, + waitingForNewBlocksSince: this.awaitingForNewBlocksSince(), + msg: this._isSyncingMethodExist ? "" : "RPC doesn't implement web3.eth.isSyncing", + } + }, + + account: { + address: this.app.client.getAccountAddress(), + balance: (await this.app.client.getAccountBalance()).toString(), + }, + + queues: { + agreementQueue: this.app.queues.getAgreementQueueLength(), + estimationQueue: this.app.queues.getEstimationQueueLength() + }, + + protocol: { + cfa: this.app.client.contracts.getCFAv1Address(), + ida: this.app.client.contracts.getIDAv1Address(), + gda: this.app.client.contracts.getGDAv1Address(), + supertokens: Object.values(this.app.client.superToken.superTokenNames) + } + }; + } } module.exports = Report; diff --git a/src/services/notificationJobs.js b/src/services/notificationJobs.js index a123eaa0..72cf6bbf 100644 --- a/src/services/notificationJobs.js +++ b/src/services/notificationJobs.js @@ -16,7 +16,7 @@ class NotificationJobs { async sendReport () { const healthcheck = await this.app.healthReport.fullReport(); if(!healthcheck.healthy) { - const healthData = `Healthy: ${healthcheck.healthy}\nChainId: ${healthcheck.network.chainId}`; + const healthData = `Instance Name: ${this.app.config.INSTANCE_NAME}\nHealthy: ${healthcheck.healthy}\nChainId: ${healthcheck.network.chainId}\nReasons: ${healthcheck.reasons.join('\n')}`; this.app.notifier.sendNotification(healthData); } const currentTime = Date.now(); diff --git a/test/integration/ida.integration.test.js b/test/integration/ida.integration.test.js index e72b9a7d..da4e8179 100644 --- a/test/integration/ida.integration.test.js +++ b/test/integration/ida.integration.test.js @@ -74,6 +74,7 @@ describe("IDA integration tests", () => { await app.shutdown(); protocolHelper.expectLiquidationV2(result[0], AGENT_ACCOUNT, accounts[0], "0"); } catch (err) { + protocolHelper.exitWithError(err); } }); diff --git a/test/integration/node.integration.test.js b/test/integration/node.integration.test.js index ca562d4c..f56384f6 100644 --- a/test/integration/node.integration.test.js +++ b/test/integration/node.integration.test.js @@ -84,7 +84,7 @@ describe("Agent configurations tests", () => { try { await helper.operations.createStream(helper.sf.superToken.options.address, accounts[0], accounts[2], "100000000000"); await ganache.helper.timeTravelOnce(provider, web3, 1); - await bootNode({resolver: helper.sf.resolver.options.address}); + await bootNode({resolver: helper.sf.resolver.options.address, rpc_stuck_threshold: 10}); let healthy; while (true) { await protocolHelper.timeout(9000); diff --git a/test/unit-tests/services/notificationJobs.test.js b/test/unit-tests/services/notificationJobs.test.js index 6777e805..486f155c 100644 --- a/test/unit-tests/services/notificationJobs.test.js +++ b/test/unit-tests/services/notificationJobs.test.js @@ -11,7 +11,7 @@ describe("NotificationJobs", () => { sandbox = sinon.createSandbox(); appMock = { healthReport: { - fullReport: sinon.stub().resolves({ healthy: false, network: { chainId: 1 } }), + fullReport: sinon.stub().resolves({ healthy: false, network: { chainId: 1 }, reasons: ["test"] }), }, notifier: { sendNotification: sinon.stub(), @@ -22,6 +22,9 @@ describe("NotificationJobs", () => { logger: { info: sinon.stub(), }, + config: { + INSTANCE_NAME: "test", + }, _isShutdown: false, }; diff --git a/test/utils/protocolHelper.js b/test/utils/protocolHelper.js index 700f113a..b183d053 100644 --- a/test/utils/protocolHelper.js +++ b/test/utils/protocolHelper.js @@ -206,7 +206,8 @@ function getSentinelConfig(config) { additional_liquidation_delay: 0, block_offset: 1, liquidation_job_awaits: 5000, - fastsync: "false" + fastsync: "false", + rpc_stuck_threshold: 100, }; return { ...myBaseConfig, ...config };