From 1e90a4055144e48ec007b6237b6a4fecba2748fb Mon Sep 17 00:00:00 2001 From: David Luna Date: Fri, 12 Jan 2024 08:34:40 +0100 Subject: [PATCH] fix(host-metrics)!: fix process.cpu.* metrics (#1785) --- .../src/BaseMetrics.ts | 2 +- .../opentelemetry-host-metrics/src/enum.ts | 10 +- .../opentelemetry-host-metrics/src/metric.ts | 51 ++++---- .../src/stats/common.ts | 43 +++++-- .../test/metric.test.ts | 111 +++++++++++------- .../test/mocks/process.json | 10 ++ 6 files changed, 150 insertions(+), 77 deletions(-) create mode 100644 packages/opentelemetry-host-metrics/test/mocks/process.json diff --git a/packages/opentelemetry-host-metrics/src/BaseMetrics.ts b/packages/opentelemetry-host-metrics/src/BaseMetrics.ts index 6d36f169d4..fa9453ff22 100644 --- a/packages/opentelemetry-host-metrics/src/BaseMetrics.ts +++ b/packages/opentelemetry-host-metrics/src/BaseMetrics.ts @@ -46,7 +46,7 @@ export abstract class BaseMetrics { constructor(config: MetricsCollectorConfig) { this._name = config.name || DEFAULT_NAME; const meterProvider = - config.meterProvider! || api.metrics.getMeterProvider(); + config.meterProvider || api.metrics.getMeterProvider(); if (!config.meterProvider) { this._logger.warn('No meter provider, using default'); } diff --git a/packages/opentelemetry-host-metrics/src/enum.ts b/packages/opentelemetry-host-metrics/src/enum.ts index c23bc57587..acfe4adc05 100644 --- a/packages/opentelemetry-host-metrics/src/enum.ts +++ b/packages/opentelemetry-host-metrics/src/enum.ts @@ -27,9 +27,16 @@ export enum METRIC_NAMES { PROCESS_MEMORY_USAGE = 'process.memory.usage', } -export enum METRIC_ATTRIBUTES { +export enum ATTRIBUTE_NAMES { SYSTEM_CPU_LOGICAL_NUMBER = 'system.cpu.logical_number', SYSTEM_CPU_STATE = 'system.cpu.state', + SYSTEM_MEMORY_STATE = 'system.memory.state', + SYSTEM_DEVICE = 'system.device', + SYSTEM_NETWORK_DIRECTION = 'system.network.direction', + SYSTEM_NETWORK_STATE = 'system.network.state', + // TODO: change value if semconv changes + // https://github.com/open-telemetry/opentelemetry-specification/issues/3776 + PROCESS_CPU_STATE = 'state', } export enum CPU_LABELS { @@ -41,7 +48,6 @@ export enum CPU_LABELS { } export enum NETWORK_LABELS { - DEVICE = 'device', RECEIVE = 'receive', TRANSMIT = 'transmit', } diff --git a/packages/opentelemetry-host-metrics/src/metric.ts b/packages/opentelemetry-host-metrics/src/metric.ts index 022ba8d051..2604e95d00 100644 --- a/packages/opentelemetry-host-metrics/src/metric.ts +++ b/packages/opentelemetry-host-metrics/src/metric.ts @@ -40,8 +40,8 @@ export class HostMetrics extends BaseMetrics { observableResult: api.BatchObservableResult, cpuUsages: CpuUsageData[] ): void { - const stateAttr = enums.METRIC_ATTRIBUTES.SYSTEM_CPU_STATE; - const cpuAttr = enums.METRIC_ATTRIBUTES.SYSTEM_CPU_LOGICAL_NUMBER; + const stateAttr = enums.ATTRIBUTE_NAMES.SYSTEM_CPU_STATE; + const cpuAttr = enums.ATTRIBUTE_NAMES.SYSTEM_CPU_LOGICAL_NUMBER; for (let i = 0, j = cpuUsages.length; i < j; i++) { const cpuUsage = cpuUsages[i]; @@ -93,25 +93,27 @@ export class HostMetrics extends BaseMetrics { observableResult: api.BatchObservableResult, processCpuUsage: ProcessCpuUsageData ): void { + const stateAttr = enums.ATTRIBUTE_NAMES.PROCESS_CPU_STATE; + observableResult.observe(this._processCpuTime, processCpuUsage.user, { - state: enums.CPU_LABELS.USER, + [stateAttr]: enums.CPU_LABELS.USER, }); observableResult.observe(this._processCpuTime, processCpuUsage.system, { - state: enums.CPU_LABELS.SYSTEM, + [stateAttr]: enums.CPU_LABELS.SYSTEM, }); observableResult.observe( this._processCpuUtilization, processCpuUsage.userP, { - state: enums.CPU_LABELS.USER, + [stateAttr]: enums.CPU_LABELS.USER, } ); observableResult.observe( this._processCpuUtilization, processCpuUsage.systemP, { - state: enums.CPU_LABELS.SYSTEM, + [stateAttr]: enums.CPU_LABELS.SYSTEM, } ); } @@ -120,18 +122,20 @@ export class HostMetrics extends BaseMetrics { observableResult: api.BatchObservableResult, memUsage: MemoryData ): void { + const stateAttr = enums.ATTRIBUTE_NAMES.SYSTEM_MEMORY_STATE; + observableResult.observe(this._memoryUsage, memUsage.used, { - state: enums.MEMORY_LABELS.USED, + [stateAttr]: enums.MEMORY_LABELS.USED, }); observableResult.observe(this._memoryUsage, memUsage.free, { - state: enums.MEMORY_LABELS.FREE, + [stateAttr]: enums.MEMORY_LABELS.FREE, }); observableResult.observe(this._memoryUtilization, memUsage.usedP, { - state: enums.MEMORY_LABELS.USED, + [stateAttr]: enums.MEMORY_LABELS.USED, }); observableResult.observe(this._memoryUtilization, memUsage.freeP, { - state: enums.MEMORY_LABELS.FREE, + [stateAttr]: enums.MEMORY_LABELS.FREE, }); } @@ -146,33 +150,36 @@ export class HostMetrics extends BaseMetrics { observableResult: api.BatchObservableResult, networkUsages: NetworkData[] ): void { + const deviceAttr = enums.ATTRIBUTE_NAMES.SYSTEM_DEVICE; + const directionAttr = enums.ATTRIBUTE_NAMES.SYSTEM_NETWORK_DIRECTION; + for (let i = 0, j = networkUsages.length; i < j; i++) { const networkUsage = networkUsages[i]; observableResult.observe(this._networkDropped, networkUsage.rx_dropped, { - [enums.NETWORK_LABELS.DEVICE]: networkUsage.iface, - direction: enums.NETWORK_LABELS.RECEIVE, + [deviceAttr]: networkUsage.iface, + [directionAttr]: enums.NETWORK_LABELS.RECEIVE, }); observableResult.observe(this._networkDropped, networkUsage.tx_dropped, { - device: networkUsage.iface, - direction: enums.NETWORK_LABELS.TRANSMIT, + [deviceAttr]: networkUsage.iface, + [directionAttr]: enums.NETWORK_LABELS.TRANSMIT, }); observableResult.observe(this._networkErrors, networkUsage.rx_errors, { - device: networkUsage.iface, - direction: enums.NETWORK_LABELS.RECEIVE, + [deviceAttr]: networkUsage.iface, + [directionAttr]: enums.NETWORK_LABELS.RECEIVE, }); observableResult.observe(this._networkErrors, networkUsage.tx_errors, { - device: networkUsage.iface, - direction: enums.NETWORK_LABELS.TRANSMIT, + [deviceAttr]: networkUsage.iface, + [directionAttr]: enums.NETWORK_LABELS.TRANSMIT, }); observableResult.observe(this._networkIo, networkUsage.rx_bytes, { - device: networkUsage.iface, - direction: enums.NETWORK_LABELS.RECEIVE, + [deviceAttr]: networkUsage.iface, + [directionAttr]: enums.NETWORK_LABELS.RECEIVE, }); observableResult.observe(this._networkIo, networkUsage.tx_bytes, { - device: networkUsage.iface, - direction: enums.NETWORK_LABELS.TRANSMIT, + [deviceAttr]: networkUsage.iface, + [directionAttr]: enums.NETWORK_LABELS.TRANSMIT, }); } } diff --git a/packages/opentelemetry-host-metrics/src/stats/common.ts b/packages/opentelemetry-host-metrics/src/stats/common.ts index 520dc2cd2b..6803905fb7 100644 --- a/packages/opentelemetry-host-metrics/src/stats/common.ts +++ b/packages/opentelemetry-host-metrics/src/stats/common.ts @@ -19,7 +19,7 @@ import * as os from 'os'; import { CpuUsageData, MemoryData, ProcessCpuUsageData } from '../types'; const MILLISECOND = 1 / 1e3; -let cpuUsageTime: number | undefined = undefined; +const MICROSECOND = 1 / 1e6; /** * We get data as soon as we load the module so the 1st collect @@ -77,19 +77,38 @@ export function getCpuUsageData(): CpuUsageData[] { } /** - * It returns process cpu load delta from last time - to be used with SumObservers. - * When called first time it will return 0 and then delta will be calculated + * We get data as soon as we load the module so the 1st collect + * of the metric already has valuable data to be sent. + */ +let prevProcData: { time: number; usage: NodeJS.CpuUsage } = { + time: Date.now(), + usage: process.cpuUsage(), +}; + +/** + * Gets the process CPU usage and returns + * - the time spent in `user` state + * - the time spent in `system` state + * - the % of time in `user` state since last measurement + * - the % of time in `system` state since last measurement */ export function getProcessCpuUsageData(): ProcessCpuUsageData { - if (typeof cpuUsageTime !== 'number') { - cpuUsageTime = new Date().getTime() - process.uptime() * 1000; - } - const timeElapsed = (new Date().getTime() - cpuUsageTime) / 1000; - const cpuUsage: NodeJS.CpuUsage = process.cpuUsage(); - const user = cpuUsage.user * MILLISECOND; - const system = cpuUsage.system * MILLISECOND; - const userP = user / timeElapsed; - const systemP = system / timeElapsed; + const currentTime = Date.now(); + const currentUsage = process.cpuUsage(); + const prevUsage = prevProcData.usage; + // According to semantic conventions we need to divide by + // - time elapsed (in microseconds to match `process.cpuUsage()` units) + // - number of CPUs + const timeElapsed = (currentTime - prevProcData.time) * 1000; + const cpusTimeElapsed = timeElapsed * prevOsData.cpus.length; + + const user = currentUsage.user * MICROSECOND; + const system = currentUsage.system * MICROSECOND; + const userP = (currentUsage.user - prevUsage.user) / cpusTimeElapsed; + const systemP = (currentUsage.system - prevUsage.system) / cpusTimeElapsed; + + prevProcData = { time: currentTime, usage: currentUsage }; + return { user, system, diff --git a/packages/opentelemetry-host-metrics/test/metric.test.ts b/packages/opentelemetry-host-metrics/test/metric.test.ts index 64e5cf3d90..196e74e702 100644 --- a/packages/opentelemetry-host-metrics/test/metric.test.ts +++ b/packages/opentelemetry-host-metrics/test/metric.test.ts @@ -27,10 +27,11 @@ import { import * as assert from 'assert'; import * as os from 'os'; import * as sinon from 'sinon'; -import { METRIC_ATTRIBUTES } from '../src/enum'; +import { ATTRIBUTE_NAMES } from '../src/enum'; import { HostMetrics } from '../src'; const cpuJson = require('./mocks/cpu.json'); +const processJson = require('./mocks/process.json'); const networkJson = require('./mocks/network.json'); class TestMetricReader extends MetricReader { @@ -75,7 +76,20 @@ const mockedOS = { }, }; -const INTERVAL = 3000; +const mockedProcess = { + uptime: function () { + return 0; + }, + procIdx: 0, + cpuUsage: function () { + return processJson[this.procIdx++ % 2]; + }, + memoryUsage: { + rss: function () { + return 123456; + }, + }, +}; describe('Host Metrics', () => { let meterProvider: MeterProvider; @@ -113,24 +127,17 @@ describe('Host Metrics', () => { sandbox = sinon.createSandbox(); sandbox.useFakeTimers(); - sandbox.stub(os, 'freemem').callsFake(() => { - return mockedOS.freemem(); - }); - sandbox.stub(os, 'totalmem').returns(mockedOS.totalmem()); + sandbox.stub(os, 'freemem').callsFake(mockedOS.freemem); + sandbox.stub(os, 'totalmem').callsFake(mockedOS.totalmem); sandbox.stub(os, 'cpus').callsFake(() => mockedOS.cpus()); - sandbox.stub(process, 'uptime').returns(0); - sandbox.stub(SI, 'networkStats').callsFake(() => { - return mockedSI.networkStats(); - }); - sandbox.stub(process, 'cpuUsage').callsFake(() => { - return { - user: 90713560, - system: 63192630, - }; - }); - sandbox.stub(process.memoryUsage, 'rss').callsFake(() => { - return 123456; - }); + sandbox.stub(process, 'uptime').callsFake(mockedProcess.uptime); + sandbox + .stub(process, 'cpuUsage') + .callsFake(() => mockedProcess.cpuUsage()); + sandbox + .stub(process.memoryUsage, 'rss') + .callsFake(mockedProcess.memoryUsage.rss); + sandbox.stub(SI, 'networkStats').callsFake(mockedSI.networkStats); reader = new TestMetricReader(); @@ -144,13 +151,9 @@ describe('Host Metrics', () => { await hostMetrics.start(); - const dateStub = sandbox - .stub(Date.prototype, 'getTime') - .returns(process.uptime() * 1000 + 1); // Drop first frame cpu metrics, see - // src/common.ts getCpuUsageData + // src/common.ts getCpuUsageData/getProcessCpuUsageData await reader.collect(); - dateStub.returns(process.uptime() * 1000 + INTERVAL); // advance the clock for the next collection sandbox.clock.tick(1000); @@ -162,8 +165,12 @@ describe('Host Metrics', () => { sandbox.restore(); }); - const sysCpuStateAttr = METRIC_ATTRIBUTES.SYSTEM_CPU_STATE; - const sysCpuNumAttr = METRIC_ATTRIBUTES.SYSTEM_CPU_LOGICAL_NUMBER; + const sysCpuStateAttr = ATTRIBUTE_NAMES.SYSTEM_CPU_STATE; + const sysCpuNumAttr = ATTRIBUTE_NAMES.SYSTEM_CPU_LOGICAL_NUMBER; + const sysMemStateAttr = ATTRIBUTE_NAMES.SYSTEM_MEMORY_STATE; + const sysDeviceAttr = ATTRIBUTE_NAMES.SYSTEM_DEVICE; + const sysNetDirAttr = ATTRIBUTE_NAMES.SYSTEM_NETWORK_DIRECTION; + const procCpuStateAttr = ATTRIBUTE_NAMES.PROCESS_CPU_STATE; it('should export CPU time metrics', async () => { const metric = await getRecords(reader, 'system.cpu.time'); @@ -280,50 +287,74 @@ describe('Host Metrics', () => { it('should export Memory usage metrics', async () => { const metric = await getRecords(reader, 'system.memory.usage'); - ensureValue(metric, { state: 'used' }, 1024 * 1024 - 1024); - ensureValue(metric, { state: 'free' }, 1024); + ensureValue(metric, { [sysMemStateAttr]: 'used' }, 1024 * 1024 - 1024); + ensureValue(metric, { [sysMemStateAttr]: 'free' }, 1024); }); it('should export Memory utilization metrics', async () => { const metric = await getRecords(reader, 'system.memory.utilization'); - ensureValue(metric, { state: 'used' }, 0.9990234375); - ensureValue(metric, { state: 'free' }, 0.0009765625); + ensureValue(metric, { [sysMemStateAttr]: 'used' }, 0.9990234375); + ensureValue(metric, { [sysMemStateAttr]: 'free' }, 0.0009765625); }); it('should export Network io dropped', async () => { const metric = await getRecords(reader, 'system.network.dropped'); - ensureValue(metric, { direction: 'receive', device: 'eth0' }, 1200); - ensureValue(metric, { direction: 'transmit', device: 'eth0' }, 12); + ensureValue( + metric, + { [sysNetDirAttr]: 'receive', [sysDeviceAttr]: 'eth0' }, + 1200 + ); + ensureValue( + metric, + { [sysNetDirAttr]: 'transmit', [sysDeviceAttr]: 'eth0' }, + 12 + ); }); it('should export Network io errors', async () => { const metric = await getRecords(reader, 'system.network.errors'); - ensureValue(metric, { direction: 'receive', device: 'eth0' }, 3); - ensureValue(metric, { direction: 'transmit', device: 'eth0' }, 15); + ensureValue( + metric, + { [sysNetDirAttr]: 'receive', [sysDeviceAttr]: 'eth0' }, + 3 + ); + ensureValue( + metric, + { [sysNetDirAttr]: 'transmit', [sysDeviceAttr]: 'eth0' }, + 15 + ); }); it('should export Network io bytes', async () => { const metric = await getRecords(reader, 'system.network.io'); - ensureValue(metric, { direction: 'receive', device: 'eth0' }, 123123); - ensureValue(metric, { direction: 'transmit', device: 'eth0' }, 321321); + ensureValue( + metric, + { [sysNetDirAttr]: 'receive', [sysDeviceAttr]: 'eth0' }, + 123123 + ); + ensureValue( + metric, + { [sysNetDirAttr]: 'transmit', [sysDeviceAttr]: 'eth0' }, + 321321 + ); }); it('should export Process CPU time metrics', async () => { const metric = await getRecords(reader, 'process.cpu.time'); - ensureValue(metric, { state: 'user' }, 90713.56); - ensureValue(metric, { state: 'system' }, 63192.630000000005); + ensureValue(metric, { [procCpuStateAttr]: 'user' }, 90.71356); + ensureValue(metric, { [procCpuStateAttr]: 'system' }, 63.192629999999994); }); it('should export Process CPU utilization metrics', async () => { const metric = await getRecords(reader, 'process.cpu.utilization'); - ensureValue(metric, { state: 'user' }, 30247.935978659552); - ensureValue(metric, { state: 'system' }, 21071.23374458153); + ensureValue(metric, { [procCpuStateAttr]: 'user' }, 0.025); + ensureValue(metric, { [procCpuStateAttr]: 'system' }, 0.05); }); it('should export Process Memory usage metrics', async () => { diff --git a/packages/opentelemetry-host-metrics/test/mocks/process.json b/packages/opentelemetry-host-metrics/test/mocks/process.json new file mode 100644 index 0000000000..11ba6c2f57 --- /dev/null +++ b/packages/opentelemetry-host-metrics/test/mocks/process.json @@ -0,0 +1,10 @@ +[ + { + "user": 90663560, + "system": 63092630 + }, + { + "user": 90713560, + "system": 63192630 + } +] \ No newline at end of file