Skip to content

Commit

Permalink
fix: host-metrics system.cpu.utilization calculation fix (#1741)
Browse files Browse the repository at this point in the history
* tests: tune the fake clock for testing

* tests: tune the values to expect

* chore: lint autofix

* chore: improve os.cpus mock

* chore: revert implementation for process CPU usage

* chore: rename var

* chore: avoid having useless data upon 1st collection

* chore: remove comments

* chore: change system.cpu attribute names to match semantic conventions

* chore: update test

* chore: lint fix
  • Loading branch information
david-luna authored Nov 7, 2023
1 parent de6156a commit b9350d9
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 82 deletions.
5 changes: 5 additions & 0 deletions packages/opentelemetry-host-metrics/src/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ export enum METRIC_NAMES {
PROCESS_MEMORY_USAGE = 'process.memory.usage',
}

export enum METRIC_ATTRIBUTES {
SYSTEM_CPU_LOGICAL_NUMBER = 'system.cpu.logical_number',
SYSTEM_CPU_STATE = 'system.cpu.state',
}

export enum CPU_LABELS {
USER = 'user',
SYSTEM = 'system',
Expand Down
43 changes: 23 additions & 20 deletions packages/opentelemetry-host-metrics/src/metric.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,48 +40,51 @@ export class HostMetrics extends BaseMetrics {
observableResult: api.BatchObservableResult,
cpuUsages: CpuUsageData[]
): void {
const stateAttr = enums.METRIC_ATTRIBUTES.SYSTEM_CPU_STATE;
const cpuAttr = enums.METRIC_ATTRIBUTES.SYSTEM_CPU_LOGICAL_NUMBER;

for (let i = 0, j = cpuUsages.length; i < j; i++) {
const cpuUsage = cpuUsages[i];
observableResult.observe(this._cpuTime, cpuUsage.user, {
state: enums.CPU_LABELS.USER,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.USER,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuTime, cpuUsage.system, {
state: enums.CPU_LABELS.SYSTEM,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.SYSTEM,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuTime, cpuUsage.idle, {
state: enums.CPU_LABELS.IDLE,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.IDLE,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuTime, cpuUsage.interrupt, {
state: enums.CPU_LABELS.INTERRUPT,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.INTERRUPT,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuTime, cpuUsage.nice, {
state: enums.CPU_LABELS.NICE,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.NICE,
[cpuAttr]: cpuUsage.cpuNumber,
});

observableResult.observe(this._cpuUtilization, cpuUsage.userP, {
state: enums.CPU_LABELS.USER,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.USER,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuUtilization, cpuUsage.systemP, {
state: enums.CPU_LABELS.SYSTEM,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.SYSTEM,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuUtilization, cpuUsage.idleP, {
state: enums.CPU_LABELS.IDLE,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.IDLE,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuUtilization, cpuUsage.interruptP, {
state: enums.CPU_LABELS.INTERRUPT,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.INTERRUPT,
[cpuAttr]: cpuUsage.cpuNumber,
});
observableResult.observe(this._cpuUtilization, cpuUsage.niceP, {
state: enums.CPU_LABELS.NICE,
cpu: cpuUsage.cpuNumber,
[stateAttr]: enums.CPU_LABELS.NICE,
[cpuAttr]: cpuUsage.cpuNumber,
});
}
}
Expand Down
49 changes: 32 additions & 17 deletions packages/opentelemetry-host-metrics/src/stats/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,39 @@ const MILLISECOND = 1 / 1e3;
let cpuUsageTime: number | undefined = undefined;

/**
* It returns cpu load delta from last time - to be used with SumObservers.
* When called first time it will return 0 and then delta will be calculated
* We get data as soon as we load the module so the 1st collect
* of the metric already has valuable data to be sent.
*/
let prevOsData: { time: number; cpus: os.CpuInfo[] } = {
time: Date.now(),
cpus: os.cpus(),
};

/**
* For each CPU returned by `os.cpus()` it returns
* - the CPU times in each state (user, sys, ...) in seconds
* - the % of time the CPU was in each state since last measurement
*/
export function getCpuUsageData(): CpuUsageData[] {
if (typeof cpuUsageTime !== 'number') {
cpuUsageTime = new Date().getTime() - process.uptime() * 1000;
}
const currentTime = Date.now();
const timeElapsed = currentTime - prevOsData.time;
const currentOsData = { time: currentTime, cpus: os.cpus() };

const timeElapsed = (new Date().getTime() - cpuUsageTime) / 1000;
const usageData = currentOsData.cpus.map((cpu, cpuNumber) => {
const prevTimes = prevOsData.cpus[cpuNumber].times;
const currTimes = cpu.times;

return os.cpus().map((cpu, cpuNumber) => {
const idle = cpu.times.idle * MILLISECOND;
const user = cpu.times.user * MILLISECOND;
const system = cpu.times.sys * MILLISECOND;
const interrupt = cpu.times.irq * MILLISECOND;
const nice = cpu.times.nice * MILLISECOND;
const idle = currTimes.idle * MILLISECOND;
const user = currTimes.user * MILLISECOND;
const system = currTimes.sys * MILLISECOND;
const interrupt = currTimes.irq * MILLISECOND;
const nice = currTimes.nice * MILLISECOND;

const idleP = idle / timeElapsed;
const userP = user / timeElapsed;
const systemP = system / timeElapsed;
const interruptP = interrupt / timeElapsed;
const niceP = nice / timeElapsed;
const idleP = (currTimes.idle - prevTimes.idle) / timeElapsed;
const userP = (currTimes.user - prevTimes.user) / timeElapsed;
const systemP = (currTimes.sys - prevTimes.sys) / timeElapsed;
const interruptP = (currTimes.irq - prevTimes.irq) / timeElapsed;
const niceP = (currTimes.nice - prevTimes.nice) / timeElapsed;

return {
cpuNumber: String(cpuNumber),
Expand All @@ -59,6 +70,10 @@ export function getCpuUsageData(): CpuUsageData[] {
niceP,
};
});

prevOsData = currentOsData;

return usageData;
}

/**
Expand Down
137 changes: 114 additions & 23 deletions packages/opentelemetry-host-metrics/test/metric.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
import * as assert from 'assert';
import * as os from 'os';
import * as sinon from 'sinon';
import { METRIC_ATTRIBUTES } from '../src/enum';
import { HostMetrics } from '../src';

const cpuJson = require('./mocks/cpu.json');
Expand Down Expand Up @@ -68,6 +69,10 @@ const mockedOS = {
totalmem: function () {
return 1024 * 1024;
},
cpusIdx: 0,
cpus: function () {
return cpuJson[this.cpusIdx++ % 2];
},
};

const INTERVAL = 3000;
Expand Down Expand Up @@ -112,7 +117,7 @@ describe('Host Metrics', () => {
return mockedOS.freemem();
});
sandbox.stub(os, 'totalmem').returns(mockedOS.totalmem());
sandbox.stub(os, 'cpus').returns(cpuJson);
sandbox.stub(os, 'cpus').callsFake(() => mockedOS.cpus());
sandbox.stub(process, 'uptime').returns(0);
sandbox.stub(SI, 'networkStats').callsFake(() => {
return mockedSI.networkStats();
Expand Down Expand Up @@ -146,43 +151,129 @@ describe('Host Metrics', () => {
await reader.collect();
dateStub.returns(process.uptime() * 1000 + INTERVAL);

// advance the clock for the next collection
sandbox.clock.tick(1000);

// invalidates throttles
countSI = 0;
});
afterEach(() => {
sandbox.restore();
});

const sysCpuStateAttr = METRIC_ATTRIBUTES.SYSTEM_CPU_STATE;
const sysCpuNumAttr = METRIC_ATTRIBUTES.SYSTEM_CPU_LOGICAL_NUMBER;

it('should export CPU time metrics', async () => {
const metric = await getRecords(reader, 'system.cpu.time');

ensureValue(metric, { state: 'user', cpu: '0' }, 90713.56);
ensureValue(metric, { state: 'system', cpu: '0' }, 63192.630000000005);
ensureValue(metric, { state: 'idle', cpu: '0' }, 374870.7);
ensureValue(metric, { state: 'interrupt', cpu: '0' }, 0);
ensureValue(metric, { state: 'nice', cpu: '0' }, 0);

ensureValue(metric, { state: 'user', cpu: '1' }, 11005.42);
ensureValue(metric, { state: 'system', cpu: '1' }, 7678.12);
ensureValue(metric, { state: 'idle', cpu: '1' }, 510034.8);
ensureValue(metric, { state: 'interrupt', cpu: '1' }, 0);
ensureValue(metric, { state: 'nice', cpu: '1' }, 0);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'user', [sysCpuNumAttr]: '0' },
90714.26
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'system', [sysCpuNumAttr]: '0' },
63192.83
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'idle', [sysCpuNumAttr]: '0' },
374870.8
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'interrupt', [sysCpuNumAttr]: '0' },
0
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'nice', [sysCpuNumAttr]: '0' },
0
);

ensureValue(
metric,
{ [sysCpuStateAttr]: 'user', [sysCpuNumAttr]: '1' },
11005.72
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'system', [sysCpuNumAttr]: '1' },
7678.62
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'idle', [sysCpuNumAttr]: '1' },
510035
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'interrupt', [sysCpuNumAttr]: '1' },
0
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'nice', [sysCpuNumAttr]: '1' },
0
);
});

it('should export CPU utilization metrics', async () => {
const metric = await getRecords(reader, 'system.cpu.utilization');

ensureValue(metric, { state: 'user', cpu: '0' }, 30247.935978659552);
ensureValue(metric, { state: 'system', cpu: '0' }, 21071.23374458153);
ensureValue(metric, { state: 'idle', cpu: '0' }, 124998.56618872957);
ensureValue(metric, { state: 'interrupt', cpu: '0' }, 0);
ensureValue(metric, { state: 'nice', cpu: '0' }, 0);

ensureValue(metric, { state: 'user', cpu: '1' }, 3669.6965655218405);
ensureValue(metric, { state: 'system', cpu: '1' }, 2560.2267422474156);
ensureValue(metric, { state: 'idle', cpu: '1' }, 170068.28942980993);
ensureValue(metric, { state: 'interrupt', cpu: '1' }, 0);
ensureValue(metric, { state: 'nice', cpu: '1' }, 0);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'user', [sysCpuNumAttr]: '0' },
0.7
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'system', [sysCpuNumAttr]: '0' },
0.2
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'idle', [sysCpuNumAttr]: '0' },
0.1
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'interrupt', [sysCpuNumAttr]: '0' },
0
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'nice', [sysCpuNumAttr]: '0' },
0
);

ensureValue(
metric,
{ [sysCpuStateAttr]: 'user', [sysCpuNumAttr]: '1' },
0.3
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'system', [sysCpuNumAttr]: '1' },
0.5
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'idle', [sysCpuNumAttr]: '1' },
0.2
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'interrupt', [sysCpuNumAttr]: '1' },
0
);
ensureValue(
metric,
{ [sysCpuStateAttr]: 'nice', [sysCpuNumAttr]: '1' },
0
);
});

it('should export Memory usage metrics', async () => {
Expand Down
Loading

0 comments on commit b9350d9

Please sign in to comment.