Skip to content

Commit

Permalink
Monitor sensor value out of critical thresholds
Browse files Browse the repository at this point in the history
Summary:
Only publishing sensors violation of known critical thresholds.
Otherwise, we would publish them with value 0 (i.e. not violating) when in fact they're violating.

Reviewed By: somasun

Differential Revision: D66833343

fbshipit-source-id: 6167014c863c488f204829e8c2d4e89f32987678
  • Loading branch information
Justin Kim authored and facebook-github-bot committed Dec 9, 2024
1 parent 359c3be commit 2eb003d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
25 changes: 25 additions & 0 deletions fboss/platform/sensor_service/SensorServiceImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,30 @@ DEFINE_int32(
"Interval at which stats subscriptions are served");

namespace facebook::fboss::platform::sensor_service {
namespace {
void monitorSensorValue(const SensorData& sensorData) {
// Don't monitor if thresholds aren't defined to prevent false data.
if (!sensorData.thresholds()->upperCriticalVal() &&
!sensorData.thresholds()->lowerCriticalVal()) {
return;
}
// Skip reporting if there's no sensor value.
if (!sensorData.value()) {
return;
}
// At least one of upperCriticalVal or lowerCriticalVal exist.
bool thresholdViolation = *sensorData.value() >
sensorData.thresholds()->upperCriticalVal().value_or(INT_MAX) ||
*sensorData.value() <
sensorData.thresholds()->lowerCriticalVal().value_or(INT_MIN);
fb303::fbData->setCounter(
fmt::format(
SensorServiceImpl::kCriticalThresholdViolation,
*sensorData.name(),
apache::thrift::util::enumNameSafe(*sensorData.sensorType())),
thresholdViolation);
}
} // namespace

SensorServiceImpl::SensorServiceImpl(const SensorConfig& sensorConfig)
: sensorConfig_(sensorConfig) {
Expand Down Expand Up @@ -168,6 +192,7 @@ SensorData SensorServiceImpl::fetchSensorDataImpl(
}
sensorData.thresholds() = thresholds ? *thresholds : Thresholds();
sensorData.sensorType() = sensorType;
monitorSensorValue(sensorData);
return sensorData;
}

Expand Down
2 changes: 2 additions & 0 deletions fboss/platform/sensor_service/SensorServiceImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class SensorServiceImpl {
auto static constexpr kReadTotal = "sensor_read.total";
auto static constexpr kTotalReadFailure = "sensor_read.total.failures";
auto static constexpr kHasReadFailure = "sensor_read.has.failures";
auto static constexpr kCriticalThresholdViolation =
"sensor_read.sensor_{}.type_{}.critical_threshold_violation";

explicit SensorServiceImpl(const SensorConfig& sensorConfig);
~SensorServiceImpl();
Expand Down

0 comments on commit 2eb003d

Please sign in to comment.