Skip to content

Commit

Permalink
fix: update windows liveness timeoutSeconds, periodSeconds to 60 and …
Browse files Browse the repository at this point in the history
…reduce tasklist usage to once (#727)

* fix: update windows liveness timeout to 60 seconds

* release notes

* add liveness tasklist fix too

* release note

* one more update

* shorten string
  • Loading branch information
bragi92 authored Jan 26, 2024
1 parent 10b595c commit 94797a1
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 30 deletions.
5 changes: 5 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Azure Monitor Metrics for AKS clusters

### Pending
* Change log -
* fix: update windows liveness timeoutSeconds, periodSeconds to 60 and reduce tasklist usage in liveness probe


## Release 01-09-2024
* Linux image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.3-main-01-09-2024-a192d342`
* Windows image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.3-main-01-09-2024-a192d342-win`
Expand Down
59 changes: 31 additions & 28 deletions otelcollector/build/windows/scripts/livenessprobe.cmd
Original file line number Diff line number Diff line change
@@ -1,18 +1,38 @@
@echo off
rem Get the current date and time
setlocal enableextensions
setlocal enabledelayedexpansion
for /f %%x in ('wmic path win32_utctime get /format:list ^| findstr "="') do (
set %%x)
for /f %%x in ('wmic path win32_utctime get /format:list ^| findstr "="') do ( set %%x )
set /a z=(14-100%Month%%%100)/12, y=10000%Year%%%10000-z
set /a ut=y*365+y/4-y/100+y/400+(153*(100%Month%%%100+12*z-3)+2)/5+Day-719469
set /a epochTimeNow=%ut%*86400 + 100%Hour%%%100*3600 + 100%Minute%%%100*60 + 100%Second%%%100

set /a durationInMinutes = -1

REM Run tasklist once and capture the output
set "MetricsExtension=false"
set "MonAgentLauncher=false"
set "otelcollector=false"

for /f "tokens=*" %%a in ('tasklist /fo "table"') do (
set "output=%%a"

REM Check for MetricsExtension.Native.exe
echo !output! | findstr /i "MetricsExtension" > nul
if !errorlevel! equ 0 set MetricsExtension=true

REM Check for MonAgentLauncher.exe
echo !output! | findstr /i "MonAgentLauncher" > nul
if !errorlevel! equ 0 set MonAgentLauncher=true

REM Check for otelcollector.exe
echo !output! | findstr /i "otelcollector" > nul
if !errorlevel! equ 0 set otelcollector=true
)

if "%MAC%" == "" (
rem Non-MAC mode
tasklist /fi "imagename eq MetricsExtension.Native.exe" /fo "table" | findstr MetricsExtension > nul
if !ERRORLEVEL! NEQ 0 (
if %MetricsExtension%==false (
echo "Metrics Extension is not running (Non-MAC mode)"
goto eof
)
Expand All @@ -29,53 +49,36 @@ if "%MAC%" == "" (
set /a duration=%epochTimeNow%-!azmonContainerStartTime!
set /a durationInMinutes=!duration! / 60
if !durationInMinutes! == 0 (
echo %epochTimeNo% "No configuration present for the AKS resource"
echo %epochTimeNow% "No configuration present for the AKS resource"
)
if !durationInMinutes! GTR 15 (
echo "Greater than 15 mins, No configuration present for the AKS resource"
goto eof
)
)
) else (
tasklist /fi "imagename eq MetricsExtension.Native.exe" /fo "table" | findstr MetricsExtension > nul
if !ERRORLEVEL! NEQ 0 (
if %MetricsExtension%==false (
echo "Metrics Extension is not running (configuration exists)"
goto eof
)
tasklist /fi "imagename eq MonAgentLauncher.exe" /fo "table" | findstr MonAgentLauncher > nul
if !ERRORLEVEL! NEQ 0 (
if %MonAgentLauncher%==false (
echo "MonAgentLauncher is not running (configuration exists)"
goto eof
)
)
)
)

@REM "Checking if fluent-bit is running"
tasklist /fi "imagename eq fluent-bit.exe" /fo "table" | findstr fluent-bit
if %ERRORLEVEL% NEQ 0 (
echo "Fluent-Bit is not running"
exit /B 1
)

@REM "Checking if config map has been updated since agent start"
if exist "C:\opt\microsoft\scripts\filesystemwatcher.txt" (
echo "Config Map Updated or DCR/DCE updated since agent started"
exit /B 1
)

@REM REM "Checking if Telegraf is running"
tasklist /fi "imagename eq telegraf.exe" /fo "table" | findstr telegraf
if %ERRORLEVEL% NEQ 0 (
echo "Telegraf is not running"
exit /B 1
goto eof
)

@REM REM "Checking if otelcollector is running"
tasklist /fi "imagename eq otelcollector.exe" /fo "table" | findstr otelcollector
if %ERRORLEVEL% NEQ 0 (
@REM "Checking if otelcollector is running"
if %otelcollector%==false (
echo "otelcollector is not running"
exit /B 1
goto eof
)

endlocal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,9 @@ spec:
- cmd
- /c
- C:\opt\microsoft\scripts\livenessprobe.cmd
periodSeconds: 15
periodSeconds: 60
initialDelaySeconds: 300
timeoutSeconds: 15
timeoutSeconds: 60
failureThreshold: 3
- name: addon-token-adapter-win
command:
Expand Down

0 comments on commit 94797a1

Please sign in to comment.