Skip to content

Commit

Permalink
fix(host-metrics)!: fix process.cpu.* metrics (#1785)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-luna authored Jan 12, 2024
1 parent 33b31d0 commit 1e90a40
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 77 deletions.
2 changes: 1 addition & 1 deletion packages/opentelemetry-host-metrics/src/BaseMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export abstract class BaseMetrics {
constructor(config: MetricsCollectorConfig) {
this._name = config.name || DEFAULT_NAME;
const meterProvider =
config.meterProvider! || api.metrics.getMeterProvider();
config.meterProvider || api.metrics.getMeterProvider();
if (!config.meterProvider) {
this._logger.warn('No meter provider, using default');
}
Expand Down
10 changes: 8 additions & 2 deletions packages/opentelemetry-host-metrics/src/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,16 @@ export enum METRIC_NAMES {
PROCESS_MEMORY_USAGE = 'process.memory.usage',
}

export enum METRIC_ATTRIBUTES {
export enum ATTRIBUTE_NAMES {
SYSTEM_CPU_LOGICAL_NUMBER = 'system.cpu.logical_number',
SYSTEM_CPU_STATE = 'system.cpu.state',
SYSTEM_MEMORY_STATE = 'system.memory.state',
SYSTEM_DEVICE = 'system.device',
SYSTEM_NETWORK_DIRECTION = 'system.network.direction',
SYSTEM_NETWORK_STATE = 'system.network.state',
// TODO: change value if semconv changes
// https://github.com/open-telemetry/opentelemetry-specification/issues/3776
PROCESS_CPU_STATE = 'state',
}

export enum CPU_LABELS {
Expand All @@ -41,7 +48,6 @@ export enum CPU_LABELS {
}

export enum NETWORK_LABELS {
DEVICE = 'device',
RECEIVE = 'receive',
TRANSMIT = 'transmit',
}
Expand Down
51 changes: 29 additions & 22 deletions packages/opentelemetry-host-metrics/src/metric.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ export class HostMetrics extends BaseMetrics {
observableResult: api.BatchObservableResult,
cpuUsages: CpuUsageData[]
): void {
const stateAttr = enums.METRIC_ATTRIBUTES.SYSTEM_CPU_STATE;
const cpuAttr = enums.METRIC_ATTRIBUTES.SYSTEM_CPU_LOGICAL_NUMBER;
const stateAttr = enums.ATTRIBUTE_NAMES.SYSTEM_CPU_STATE;
const cpuAttr = enums.ATTRIBUTE_NAMES.SYSTEM_CPU_LOGICAL_NUMBER;

for (let i = 0, j = cpuUsages.length; i < j; i++) {
const cpuUsage = cpuUsages[i];
Expand Down Expand Up @@ -93,25 +93,27 @@ export class HostMetrics extends BaseMetrics {
observableResult: api.BatchObservableResult,
processCpuUsage: ProcessCpuUsageData
): void {
const stateAttr = enums.ATTRIBUTE_NAMES.PROCESS_CPU_STATE;

observableResult.observe(this._processCpuTime, processCpuUsage.user, {
state: enums.CPU_LABELS.USER,
[stateAttr]: enums.CPU_LABELS.USER,
});
observableResult.observe(this._processCpuTime, processCpuUsage.system, {
state: enums.CPU_LABELS.SYSTEM,
[stateAttr]: enums.CPU_LABELS.SYSTEM,
});

observableResult.observe(
this._processCpuUtilization,
processCpuUsage.userP,
{
state: enums.CPU_LABELS.USER,
[stateAttr]: enums.CPU_LABELS.USER,
}
);
observableResult.observe(
this._processCpuUtilization,
processCpuUsage.systemP,
{
state: enums.CPU_LABELS.SYSTEM,
[stateAttr]: enums.CPU_LABELS.SYSTEM,
}
);
}
Expand All @@ -120,18 +122,20 @@ export class HostMetrics extends BaseMetrics {
observableResult: api.BatchObservableResult,
memUsage: MemoryData
): void {
const stateAttr = enums.ATTRIBUTE_NAMES.SYSTEM_MEMORY_STATE;

observableResult.observe(this._memoryUsage, memUsage.used, {
state: enums.MEMORY_LABELS.USED,
[stateAttr]: enums.MEMORY_LABELS.USED,
});
observableResult.observe(this._memoryUsage, memUsage.free, {
state: enums.MEMORY_LABELS.FREE,
[stateAttr]: enums.MEMORY_LABELS.FREE,
});

observableResult.observe(this._memoryUtilization, memUsage.usedP, {
state: enums.MEMORY_LABELS.USED,
[stateAttr]: enums.MEMORY_LABELS.USED,
});
observableResult.observe(this._memoryUtilization, memUsage.freeP, {
state: enums.MEMORY_LABELS.FREE,
[stateAttr]: enums.MEMORY_LABELS.FREE,
});
}

Expand All @@ -146,33 +150,36 @@ export class HostMetrics extends BaseMetrics {
observableResult: api.BatchObservableResult,
networkUsages: NetworkData[]
): void {
const deviceAttr = enums.ATTRIBUTE_NAMES.SYSTEM_DEVICE;
const directionAttr = enums.ATTRIBUTE_NAMES.SYSTEM_NETWORK_DIRECTION;

for (let i = 0, j = networkUsages.length; i < j; i++) {
const networkUsage = networkUsages[i];
observableResult.observe(this._networkDropped, networkUsage.rx_dropped, {
[enums.NETWORK_LABELS.DEVICE]: networkUsage.iface,
direction: enums.NETWORK_LABELS.RECEIVE,
[deviceAttr]: networkUsage.iface,
[directionAttr]: enums.NETWORK_LABELS.RECEIVE,
});
observableResult.observe(this._networkDropped, networkUsage.tx_dropped, {
device: networkUsage.iface,
direction: enums.NETWORK_LABELS.TRANSMIT,
[deviceAttr]: networkUsage.iface,
[directionAttr]: enums.NETWORK_LABELS.TRANSMIT,
});

observableResult.observe(this._networkErrors, networkUsage.rx_errors, {
device: networkUsage.iface,
direction: enums.NETWORK_LABELS.RECEIVE,
[deviceAttr]: networkUsage.iface,
[directionAttr]: enums.NETWORK_LABELS.RECEIVE,
});
observableResult.observe(this._networkErrors, networkUsage.tx_errors, {
device: networkUsage.iface,
direction: enums.NETWORK_LABELS.TRANSMIT,
[deviceAttr]: networkUsage.iface,
[directionAttr]: enums.NETWORK_LABELS.TRANSMIT,
});

observableResult.observe(this._networkIo, networkUsage.rx_bytes, {
device: networkUsage.iface,
direction: enums.NETWORK_LABELS.RECEIVE,
[deviceAttr]: networkUsage.iface,
[directionAttr]: enums.NETWORK_LABELS.RECEIVE,
});
observableResult.observe(this._networkIo, networkUsage.tx_bytes, {
device: networkUsage.iface,
direction: enums.NETWORK_LABELS.TRANSMIT,
[deviceAttr]: networkUsage.iface,
[directionAttr]: enums.NETWORK_LABELS.TRANSMIT,
});
}
}
Expand Down
43 changes: 31 additions & 12 deletions packages/opentelemetry-host-metrics/src/stats/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import * as os from 'os';
import { CpuUsageData, MemoryData, ProcessCpuUsageData } from '../types';

const MILLISECOND = 1 / 1e3;
let cpuUsageTime: number | undefined = undefined;
const MICROSECOND = 1 / 1e6;

/**
* We get data as soon as we load the module so the 1st collect
Expand Down Expand Up @@ -77,19 +77,38 @@ export function getCpuUsageData(): CpuUsageData[] {
}

/**
* It returns process cpu load delta from last time - to be used with SumObservers.
* When called first time it will return 0 and then delta will be calculated
* We get data as soon as we load the module so the 1st collect
* of the metric already has valuable data to be sent.
*/
let prevProcData: { time: number; usage: NodeJS.CpuUsage } = {
time: Date.now(),
usage: process.cpuUsage(),
};

/**
* Gets the process CPU usage and returns
* - the time spent in `user` state
* - the time spent in `system` state
* - the % of time in `user` state since last measurement
* - the % of time in `system` state since last measurement
*/
export function getProcessCpuUsageData(): ProcessCpuUsageData {
if (typeof cpuUsageTime !== 'number') {
cpuUsageTime = new Date().getTime() - process.uptime() * 1000;
}
const timeElapsed = (new Date().getTime() - cpuUsageTime) / 1000;
const cpuUsage: NodeJS.CpuUsage = process.cpuUsage();
const user = cpuUsage.user * MILLISECOND;
const system = cpuUsage.system * MILLISECOND;
const userP = user / timeElapsed;
const systemP = system / timeElapsed;
const currentTime = Date.now();
const currentUsage = process.cpuUsage();
const prevUsage = prevProcData.usage;
// According to semantic conventions we need to divide by
// - time elapsed (in microseconds to match `process.cpuUsage()` units)
// - number of CPUs
const timeElapsed = (currentTime - prevProcData.time) * 1000;
const cpusTimeElapsed = timeElapsed * prevOsData.cpus.length;

const user = currentUsage.user * MICROSECOND;
const system = currentUsage.system * MICROSECOND;
const userP = (currentUsage.user - prevUsage.user) / cpusTimeElapsed;
const systemP = (currentUsage.system - prevUsage.system) / cpusTimeElapsed;

prevProcData = { time: currentTime, usage: currentUsage };

return {
user,
system,
Expand Down
111 changes: 71 additions & 40 deletions packages/opentelemetry-host-metrics/test/metric.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ import {
import * as assert from 'assert';
import * as os from 'os';
import * as sinon from 'sinon';
import { METRIC_ATTRIBUTES } from '../src/enum';
import { ATTRIBUTE_NAMES } from '../src/enum';
import { HostMetrics } from '../src';

const cpuJson = require('./mocks/cpu.json');
const processJson = require('./mocks/process.json');
const networkJson = require('./mocks/network.json');

class TestMetricReader extends MetricReader {
Expand Down Expand Up @@ -75,7 +76,20 @@ const mockedOS = {
},
};

const INTERVAL = 3000;
const mockedProcess = {
uptime: function () {
return 0;
},
procIdx: 0,
cpuUsage: function () {
return processJson[this.procIdx++ % 2];
},
memoryUsage: {
rss: function () {
return 123456;
},
},
};

describe('Host Metrics', () => {
let meterProvider: MeterProvider;
Expand Down Expand Up @@ -113,24 +127,17 @@ describe('Host Metrics', () => {
sandbox = sinon.createSandbox();
sandbox.useFakeTimers();

sandbox.stub(os, 'freemem').callsFake(() => {
return mockedOS.freemem();
});
sandbox.stub(os, 'totalmem').returns(mockedOS.totalmem());
sandbox.stub(os, 'freemem').callsFake(mockedOS.freemem);
sandbox.stub(os, 'totalmem').callsFake(mockedOS.totalmem);
sandbox.stub(os, 'cpus').callsFake(() => mockedOS.cpus());
sandbox.stub(process, 'uptime').returns(0);
sandbox.stub(SI, 'networkStats').callsFake(() => {
return mockedSI.networkStats();
});
sandbox.stub(process, 'cpuUsage').callsFake(() => {
return {
user: 90713560,
system: 63192630,
};
});
sandbox.stub(process.memoryUsage, 'rss').callsFake(() => {
return 123456;
});
sandbox.stub(process, 'uptime').callsFake(mockedProcess.uptime);
sandbox
.stub(process, 'cpuUsage')
.callsFake(() => mockedProcess.cpuUsage());
sandbox
.stub(process.memoryUsage, 'rss')
.callsFake(mockedProcess.memoryUsage.rss);
sandbox.stub(SI, 'networkStats').callsFake(mockedSI.networkStats);

reader = new TestMetricReader();

Expand All @@ -144,13 +151,9 @@ describe('Host Metrics', () => {

await hostMetrics.start();

const dateStub = sandbox
.stub(Date.prototype, 'getTime')
.returns(process.uptime() * 1000 + 1);
// Drop first frame cpu metrics, see
// src/common.ts getCpuUsageData
// src/common.ts getCpuUsageData/getProcessCpuUsageData
await reader.collect();
dateStub.returns(process.uptime() * 1000 + INTERVAL);

// advance the clock for the next collection
sandbox.clock.tick(1000);
Expand All @@ -162,8 +165,12 @@ describe('Host Metrics', () => {
sandbox.restore();
});

const sysCpuStateAttr = METRIC_ATTRIBUTES.SYSTEM_CPU_STATE;
const sysCpuNumAttr = METRIC_ATTRIBUTES.SYSTEM_CPU_LOGICAL_NUMBER;
const sysCpuStateAttr = ATTRIBUTE_NAMES.SYSTEM_CPU_STATE;
const sysCpuNumAttr = ATTRIBUTE_NAMES.SYSTEM_CPU_LOGICAL_NUMBER;
const sysMemStateAttr = ATTRIBUTE_NAMES.SYSTEM_MEMORY_STATE;
const sysDeviceAttr = ATTRIBUTE_NAMES.SYSTEM_DEVICE;
const sysNetDirAttr = ATTRIBUTE_NAMES.SYSTEM_NETWORK_DIRECTION;
const procCpuStateAttr = ATTRIBUTE_NAMES.PROCESS_CPU_STATE;

it('should export CPU time metrics', async () => {
const metric = await getRecords(reader, 'system.cpu.time');
Expand Down Expand Up @@ -280,50 +287,74 @@ describe('Host Metrics', () => {
it('should export Memory usage metrics', async () => {
const metric = await getRecords(reader, 'system.memory.usage');

ensureValue(metric, { state: 'used' }, 1024 * 1024 - 1024);
ensureValue(metric, { state: 'free' }, 1024);
ensureValue(metric, { [sysMemStateAttr]: 'used' }, 1024 * 1024 - 1024);
ensureValue(metric, { [sysMemStateAttr]: 'free' }, 1024);
});

it('should export Memory utilization metrics', async () => {
const metric = await getRecords(reader, 'system.memory.utilization');

ensureValue(metric, { state: 'used' }, 0.9990234375);
ensureValue(metric, { state: 'free' }, 0.0009765625);
ensureValue(metric, { [sysMemStateAttr]: 'used' }, 0.9990234375);
ensureValue(metric, { [sysMemStateAttr]: 'free' }, 0.0009765625);
});

it('should export Network io dropped', async () => {
const metric = await getRecords(reader, 'system.network.dropped');

ensureValue(metric, { direction: 'receive', device: 'eth0' }, 1200);
ensureValue(metric, { direction: 'transmit', device: 'eth0' }, 12);
ensureValue(
metric,
{ [sysNetDirAttr]: 'receive', [sysDeviceAttr]: 'eth0' },
1200
);
ensureValue(
metric,
{ [sysNetDirAttr]: 'transmit', [sysDeviceAttr]: 'eth0' },
12
);
});

it('should export Network io errors', async () => {
const metric = await getRecords(reader, 'system.network.errors');

ensureValue(metric, { direction: 'receive', device: 'eth0' }, 3);
ensureValue(metric, { direction: 'transmit', device: 'eth0' }, 15);
ensureValue(
metric,
{ [sysNetDirAttr]: 'receive', [sysDeviceAttr]: 'eth0' },
3
);
ensureValue(
metric,
{ [sysNetDirAttr]: 'transmit', [sysDeviceAttr]: 'eth0' },
15
);
});

it('should export Network io bytes', async () => {
const metric = await getRecords(reader, 'system.network.io');

ensureValue(metric, { direction: 'receive', device: 'eth0' }, 123123);
ensureValue(metric, { direction: 'transmit', device: 'eth0' }, 321321);
ensureValue(
metric,
{ [sysNetDirAttr]: 'receive', [sysDeviceAttr]: 'eth0' },
123123
);
ensureValue(
metric,
{ [sysNetDirAttr]: 'transmit', [sysDeviceAttr]: 'eth0' },
321321
);
});

it('should export Process CPU time metrics', async () => {
const metric = await getRecords(reader, 'process.cpu.time');

ensureValue(metric, { state: 'user' }, 90713.56);
ensureValue(metric, { state: 'system' }, 63192.630000000005);
ensureValue(metric, { [procCpuStateAttr]: 'user' }, 90.71356);
ensureValue(metric, { [procCpuStateAttr]: 'system' }, 63.192629999999994);
});

it('should export Process CPU utilization metrics', async () => {
const metric = await getRecords(reader, 'process.cpu.utilization');

ensureValue(metric, { state: 'user' }, 30247.935978659552);
ensureValue(metric, { state: 'system' }, 21071.23374458153);
ensureValue(metric, { [procCpuStateAttr]: 'user' }, 0.025);
ensureValue(metric, { [procCpuStateAttr]: 'system' }, 0.05);
});

it('should export Process Memory usage metrics', async () => {
Expand Down
Loading

0 comments on commit 1e90a40

Please sign in to comment.