Skip to content

Commit

Permalink
Merge pull request #3261 from Azure/release-2.12.0.0
Browse files Browse the repository at this point in the history
Release 2.12.0.0 to master
  • Loading branch information
nagworld9 authored Nov 14, 2024
2 parents acd2f73 + b79ceb8 commit 7065e81
Show file tree
Hide file tree
Showing 239 changed files with 8,239 additions and 3,048 deletions.
1 change: 1 addition & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ This will expedite the process of getting your pull request merged and avoid ext
---

### PR information
- [ ] Ensure development PR is based on the `develop` branch.
- [ ] The title of the PR is clear and informative.
- [ ] There are a small number of commits, each of which has an informative message. This means that previously merged commits do not appear in the history of the PR. For information on cleaning up the commits in your pull request, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).
- [ ] If applicable, the PR references the bug/issue that it fixes in the description.
Expand Down
90 changes: 63 additions & 27 deletions .github/workflows/ci_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ on:

jobs:
test-python-2_6-and-3_4-versions:

strategy:
fail-fast: false
matrix:
include:
- python-version: 2.6
- python-version: 3.4
- python-version: "2.6"
- python-version: "3.4"

name: "Python ${{ matrix.python-version }} Unit Tests"
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -43,7 +43,7 @@ jobs:
- name: Test with nosetests
run: |
if [[ ${{ matrix.python-version }} == 2.6 ]]; then
if [[ ${{ matrix.python-version }} == "2.6" ]]; then
source /home/waagent/virtualenv/python2.6.9/bin/activate
else
source /home/waagent/virtualenv/python3.4.8/bin/activate
Expand Down Expand Up @@ -87,30 +87,23 @@ jobs:
fail-fast: false
matrix:
include:
- python-version: 3.5
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e,makepkg.py"

- python-version: 3.6
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e"

- python-version: 3.7
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e"

- python-version: 3.8
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e"

- python-version: 3.9
PYLINTOPTS: "--rcfile=ci/3.6.pylintrc"
- python-version: "3.5"
# workaround found in https://github.com/actions/setup-python/issues/866
# for issue "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:728)" on Python 3.5
pip_trusted_host: "pypi.python.org pypi.org files.pythonhosted.org"
- python-version: "3.6"
- python-version: "3.7"
- python-version: "3.8"
- python-version: "3.9"
additional-nose-opts: "--with-coverage --cover-erase --cover-inclusive --cover-branches --cover-package=azurelinuxagent"
- python-version: "3.10"
- python-version: "3.11"

name: "Python ${{ matrix.python-version }} Unit Tests"
runs-on: ubuntu-20.04

env:
PYLINTOPTS: ${{ matrix.PYLINTOPTS }}
PYLINTFILES: "azurelinuxagent setup.py makepkg.py tests tests_e2e"
NOSEOPTS: "--with-timer ${{ matrix.additional-nose-opts }}"
PYTHON_VERSION: ${{ matrix.python-version }}

steps:

Expand All @@ -121,26 +114,69 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
env:
PIP_TRUSTED_HOST: ${{ matrix.pip_trusted_host }}

- name: Install dependencies
id: install-dependencies
run: |
sudo env "PATH=$PATH" python -m pip install --upgrade pip
sudo env "PATH=$PATH" pip install -r requirements.txt
sudo env "PATH=$PATH" pip install -r test-requirements.txt
sudo env "PATH=$PATH" pip install --upgrade pylint
- name: Run pylint
run: |
pylint $PYLINTOPTS --jobs=0 $PYLINTFILES
#
# List of files/directories to be checked by pylint.
# The end-to-end tests run only on Python 3.9 and we lint them only on that version.
#
PYLINT_FILES="azurelinuxagent setup.py makepkg.py tests"
if [[ "${{ matrix.python-version }}" == "3.9" ]]; then
PYLINT_FILES="$PYLINT_FILES tests_e2e"
fi
- name: Test with nosetests
#
# Command-line options for pylint.
# * "unused-private-member" is not implemented on 3.5 and will produce "E0012: Bad option value 'unused-private-member' (bad-option-value)"
# so we suppress "bad-option-value".
# * 3.9 will produce "no-member" for several properties/methods that are added to the mocks used by the unit tests (e.g
# "E1101: Instance of 'WireProtocol' has no 'aggregate_status' member") so we suppress that warning.
# * On 3.9 pylint crashes when parsing azurelinuxagent/daemon/main.py (see https://github.com/pylint-dev/pylint/issues/9473), so we ignore it.
# * 'no-self-use' ("R0201: Method could be a function") was moved to an optional extension on 3.8 and is no longer used by default. It needs
# to be suppressed for previous versions (3.0-3.7), though.
# * 'contextmanager-generator-missing-cleanup' are false positives if yield is used inside an if-else block for contextmanager generator functions.
# (https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/contextmanager-generator-missing-cleanup.html).
# This is not implemented on versions (3.0-3.7) Bad option value 'contextmanager-generator-missing-cleanup' (bad-option-value)
# * 3.9-3.11 will produce "too-many-positional-arguments" for several methods that are having more than 5 args, so we suppress that warning.
# (R0917: Too many positional arguments (8/5) (too-many-positional-arguments))
PYLINT_OPTIONS="--rcfile=ci/pylintrc --jobs=0"
if [[ "${{ matrix.python-version }}" == "3.9" ]]; then
PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member,too-many-positional-arguments --ignore=main.py"
fi
if [[ "${{ matrix.python-version }}" =~ ^3\.(10|11)$ ]]; then
PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=too-many-positional-arguments"
fi
if [[ "${{ matrix.python-version }}" =~ ^3\.[0-7]$ ]]; then
PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use,bad-option-value"
fi
echo "PYLINT_OPTIONS: $PYLINT_OPTIONS"
echo "PYLINT_FILES: $PYLINT_FILES"
pylint $PYLINT_OPTIONS $PYLINT_FILES
- name: Execute Unit Tests
if: success() || (failure() && steps.install-dependencies.outcome == 'success')
run: |
./ci/nosetests.sh
exit $?
if [[ "${{ matrix.python-version }}" =~ ^3\.[1-9][0-9]+$ ]]; then
./ci/pytest.sh
else
./ci/nosetests.sh
fi
- name: Compile Coverage
if: matrix.python-version == 3.9
if: matrix.python-version == '3.9'
run: |
echo looking for coverage files :
ls -alh | grep -i coverage
Expand All @@ -149,7 +185,7 @@ jobs:
sudo env "PATH=$PATH" coverage report
- name: Upload Coverage
if: matrix.python-version == 3.9
if: matrix.python-version == '3.9'
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Waagent depends on some system packages in order to function properly:
* Filesystem utilities: sfdisk, fdisk, mkfs, parted
* Password tools: chpasswd, sudo
* Text processing tools: sed, grep
* Network tools: ip-route
* Network tools: ip-route, iptables

## Installation

Expand Down Expand Up @@ -568,6 +568,13 @@ OpenSSL commands. This signals OpenSSL to use any installed FIPS-compliant libra
Note that the agent itself has no FIPS-specific code. _If no FIPS-compliant certificates are
installed, then enabling this option will cause all OpenSSL commands to fail._

#### __OS.EnableFirewall__

_Type: Boolean_
_Default: n (set to 'y' in waagent.conf)_

Creates firewall rules to allow communication with the VM Host only by the Agent.

#### __OS.MonitorDhcpClientRestartPeriod__

_Type: Integer_
Expand Down
75 changes: 47 additions & 28 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,18 @@

from __future__ import print_function

import json
import os
import re
import subprocess
import sys
import threading

from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi
from azurelinuxagent.ga.cgroupcontroller import AGENT_LOG_COLLECTOR
from azurelinuxagent.ga.cpucontroller import _CpuController
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException

import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.event as event
Expand Down Expand Up @@ -131,7 +135,7 @@ def daemon(self):
"""
set_daemon_version(AGENT_VERSION)
logger.set_prefix("Daemon")
threading.current_thread().setName("Daemon")
threading.current_thread().name = "Daemon"
child_args = None \
if self.conf_file_path is None \
else "-configuration-path:{0}".format(self.conf_file_path)
Expand Down Expand Up @@ -171,7 +175,7 @@ def run_exthandlers(self, debug=False):
Run the update and extension handler
"""
logger.set_prefix("ExtHandler")
threading.current_thread().setName("ExtHandler")
threading.current_thread().name = "ExtHandler"

#
# Agents < 2.2.53 used to echo the log to the console. Since the extension handler could have been started by
Expand Down Expand Up @@ -206,42 +210,57 @@ def collect_logs(self, is_full_mode):

# Check the cgroups unit
log_collector_monitor = None
cgroups_api = SystemdCgroupsApi()
cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self")
tracked_controllers = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)
try:
cgroup_api = get_cgroup_api()
except InvalidCgroupMountpointException as e:
log_cgroup_warning("The agent does not support cgroups if the default systemd mountpoint is not being used: {0}".format(ustr(e)), send_event=True)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
except CGroupsException as e:
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

if not cpu_slice_matches or not memory_slice_matches:
logger.info("The Log Collector process is not in the proper cgroups:")
if not cpu_slice_matches:
logger.info("\tunexpected cpu slice")
if not memory_slice_matches:
logger.info("\tunexpected memory slice")
log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_controllers = log_collector_cgroup.get_controllers()

if len(tracked_controllers) != len(log_collector_cgroup.get_supported_controller_names()):
log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controller_names()))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
logger.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
logger.info(msg)
return [cpu_cgroup, memory_cgroup]
if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE):
log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

try:
log_collector = LogCollector(is_full_mode)
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector.
# Running log collector resource monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path)
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups)
for controller in tracked_controllers:
if isinstance(controller, _CpuController):
controller.initialize_cpu_usage()
break
log_collector_monitor = get_log_collector_monitor_handler(tracked_controllers)
log_collector_monitor.run()
archive = log_collector.collect_logs_and_get_archive()

archive, total_uncompressed_size = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} "
"and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH))

if log_collector_monitor is not None:
log_collector_monitor.stop()
try:
metrics_summary = log_collector_monitor.get_max_recorded_metrics()
metrics_summary['Total Uncompressed File Size (B)'] = total_uncompressed_size
msg = json.dumps(metrics_summary)
logger.info(msg)
event.add_event(op=event.WALAEventOperation.LogCollection, message=msg, log_event=False)
except Exception as e:
msg = "An error occurred while reporting log collector resource usage summary: {0}".format(ustr(e))
logger.warn(msg)
event.add_event(op=event.WALAEventOperation.LogCollection, is_success=False, message=msg, log_event=False)

except Exception as e:
logger.error("Log collection completed unsuccessfully. Error: {0}".format(ustr(e)))
logger.info("Detailed log output can be found at {0}".format(OUTPUT_RESULTS_FILE_PATH))
Expand Down Expand Up @@ -328,7 +347,7 @@ def parse_args(sys_args):
if arg == "":
# Don't parse an empty parameter
continue
m = re.match("^(?:[-/]*)configuration-path:([\w/\.\-_]+)", arg) # pylint: disable=W1401
m = re.match(r"^(?:[-/]*)configuration-path:([\w/\.\-_]+)", arg)
if not m is None:
conf_file_path = m.group(1)
if not os.path.exists(conf_file_path):
Expand Down
5 changes: 3 additions & 2 deletions azurelinuxagent/common/agent_supported_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,15 @@ def __init__(self):
class _GAVersioningGovernanceFeature(AgentSupportedFeature):
"""
CRP would drive the RSM update if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version.
Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning.
Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning
or if explicitly support for versioning is disabled in agent
Note: Especially Windows need this flag to report to CRP that GA doesn't support the updates. So linux adopted same flag to have a common solution.
"""

__NAME = SupportedFeatureNames.GAVersioningGovernance
__VERSION = "1.0"
__SUPPORTED = conf.get_auto_update_to_latest_version()
__SUPPORTED = conf.get_auto_update_to_latest_version() and conf.get_enable_ga_versioning()

def __init__(self):
super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME,
Expand Down
27 changes: 23 additions & 4 deletions azurelinuxagent/common/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class ConfigurationProvider(object):
"""

def __init__(self):
self.values = dict()
self.values = {}

def load(self, content):
if not content:
Expand Down Expand Up @@ -146,7 +146,8 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"Debug.CgroupDisableOnQuotaCheckFailure": True,
"Debug.EnableAgentMemoryUsageCheck": False,
"Debug.EnableFastTrack": True,
"Debug.EnableGAVersioning": True
"Debug.EnableGAVersioning": True,
"Debug.EnableCgroupV2ResourceLimiting": False
}


Expand Down Expand Up @@ -200,7 +201,8 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"Debug.EtpCollectionPeriod": 300,
"Debug.AutoUpdateHotfixFrequency": 14400,
"Debug.AutoUpdateNormalFrequency": 86400,
"Debug.FirewallRulesLogPeriod": 86400
"Debug.FirewallRulesLogPeriod": 86400,
"Debug.LogCollectorInitialDelay": 5 * 60
}


Expand Down Expand Up @@ -670,7 +672,7 @@ def get_enable_ga_versioning(conf=__conf__):
If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableGAVersioning", False)
return conf.get_switch("Debug.EnableGAVersioning", True)


def get_firewall_rules_log_period(conf=__conf__):
Expand All @@ -680,3 +682,20 @@ def get_firewall_rules_log_period(conf=__conf__):
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.FirewallRulesLogPeriod", 86400)


def get_enable_cgroup_v2_resource_limiting(conf=__conf__):
"""
If True, the agent will enable resource monitoring and enforcement for the log collector on machines using cgroup v2.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableCgroupV2ResourceLimiting", False)


def get_log_collector_initial_delay(conf=__conf__):
"""
Determine the initial delay at service start before the first periodic log collection.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.LogCollectorInitialDelay", 5 * 60)
Loading

0 comments on commit 7065e81

Please sign in to comment.