Skip to content

Commit

Permalink
Clean history files (#3282)
Browse files Browse the repository at this point in the history
* Clean history files
---------

Co-authored-by: narrieta@microsoft <narrieta>
  • Loading branch information
narrieta authored Dec 27, 2024
1 parent f047b31 commit 8ad3e96
Show file tree
Hide file tree
Showing 7 changed files with 312 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import ExtensionsConfigError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.future import ustr, urlparse
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, GoalStateSource
from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentFamily, ExtensionState, InVMGoalStateMetaData
from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, \
format_exception, is_str_none_or_whitespace, is_str_empty, hasattrib
format_exception, is_str_none_or_whitespace, is_str_empty, hasattrib, gettextxml


class ExtensionsGoalStateFromExtensionsConfig(ExtensionsGoalState):
Expand All @@ -38,6 +38,8 @@ def __init__(self, incarnation, xml_text, wire_client):
self._text = xml_text
self._status_upload_blob = None
self._status_upload_blob_type = None
self._status_upload_blob_xml_node = None
self._artifacts_profile_blob_xml_node = None
self._required_features = []
self._on_hold = False
self._activity_id = None
Expand Down Expand Up @@ -81,21 +83,21 @@ def _parse_extensions_config(self, xml_text, wire_client):
if required_features_list is not None:
self._parse_required_features(required_features_list)

self._status_upload_blob = findtext(xml_doc, "StatusUploadBlob")

status_upload_node = find(xml_doc, "StatusUploadBlob")
self._status_upload_blob_type = getattrib(status_upload_node, "statusBlobType")
self._status_upload_blob_xml_node = find(xml_doc, "StatusUploadBlob")
self._status_upload_blob = gettext(self._status_upload_blob_xml_node)
self._status_upload_blob_type = getattrib(self._status_upload_blob_xml_node, "statusBlobType")
logger.verbose("Extension config shows status blob type as [{0}]", self._status_upload_blob_type)

self._on_hold = ExtensionsGoalStateFromExtensionsConfig._fetch_extensions_on_hold(xml_doc, wire_client)
self._artifacts_profile_blob_xml_node = find(xml_doc, "InVMArtifactsProfileBlob")
self._on_hold = ExtensionsGoalStateFromExtensionsConfig._fetch_extensions_on_hold(self._artifacts_profile_blob_xml_node, wire_client)

in_vm_gs_metadata = InVMGoalStateMetaData(find(xml_doc, "InVMGoalStateMetaData"))
self._activity_id = self._string_to_id(in_vm_gs_metadata.activity_id)
self._correlation_id = self._string_to_id(in_vm_gs_metadata.correlation_id)
self._created_on_timestamp = self._ticks_to_utc_timestamp(in_vm_gs_metadata.created_on_ticks)

@staticmethod
def _fetch_extensions_on_hold(xml_doc, wire_client):
def _fetch_extensions_on_hold(artifacts_profile_blob_xml_node, wire_client):
def log_info(message):
logger.info(message)
add_event(op=WALAEventOperation.ArtifactsProfileBlob, message=message, is_success=True, log_event=False)
Expand All @@ -104,7 +106,7 @@ def log_warning(message):
logger.warn(message)
add_event(op=WALAEventOperation.ArtifactsProfileBlob, message=message, is_success=False, log_event=False)

artifacts_profile_blob = findtext(xml_doc, "InVMArtifactsProfileBlob")
artifacts_profile_blob = gettext(artifacts_profile_blob_xml_node)
if is_str_none_or_whitespace(artifacts_profile_blob):
log_info("ExtensionsConfig does not include a InVMArtifactsProfileBlob; will assume the VM is not on hold")
return False
Expand Down Expand Up @@ -187,12 +189,30 @@ def extensions(self):
return self._extensions

def get_redacted_text(self):
text = self._text
for ext_handler in self._extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
text = text.replace(extension.protectedSettings, "*** REDACTED ***")
return text
def redact_url(unredacted, xml_node, name):
text_xml = gettextxml(xml_node) # Note that we need to redact the raw XML text (which may contain escape sequences)
if text_xml is None:
return unredacted
parsed = urlparse(text_xml)
redacted = unredacted.replace(parsed.query, "***REDACTED***")
if redacted == unredacted:
raise Exception('Could not redact {0}'.format(name))
return redacted

try:
text = self._text
text = redact_url(text, self._status_upload_blob_xml_node, "StatusUploadBlob")
text = redact_url(text, self._artifacts_profile_blob_xml_node, "InVMArtifactsProfileBlob")
for ext_handler in self._extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
original = text
text = text.replace(extension.protectedSettings, "***REDACTED***")
if text == original:
return 'Could not redact protectedSettings for {0}'.format(extension.name)
return text
except Exception as e:
return "Error redacting text: {0}".format(e)

def _parse_required_features(self, required_features_list):
for required_feature in findall(required_features_list, "RequiredFeature"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@
# Requires Python 2.6+ and Openssl 1.0+
import datetime
import json
import re
import sys

from azurelinuxagent.common import logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.future import ustr, urlparse
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, VmSettingsParseError
from azurelinuxagent.common.protocol.restapi import VMAgentFamily, Extension, ExtensionRequestedState, ExtensionSettings
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
Expand Down Expand Up @@ -143,7 +142,27 @@ def extensions(self):
return self._extensions

def get_redacted_text(self):
return re.sub(r'("protectedSettings"\s*:\s*)"[^"]+"', r'\1"*** REDACTED ***"', self._text)
try:
text = self._text

if self.status_upload_blob is not None:
parsed = urlparse(self.status_upload_blob)
original = text
text = text.replace(parsed.query, "***REDACTED***")
if text == original:
raise Exception('Could not redact the status upload blob')

for ext_handler in self._extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
original = text
text = text.replace(extension.protectedSettings, "***REDACTED***")
if text == original:
return 'Could not redact protectedSettings for {0}'.format(extension.name)

return text
except Exception as e:
return "Error redacting text: {0}".format(e)

def _parse_vm_settings(self, json_text):
vm_settings = _CaseFoldedDict.from_dict(json.loads(json_text))
Expand Down
13 changes: 13 additions & 0 deletions azurelinuxagent/common/utils/textutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ def gettext(node):
return None


def gettextxml(node):
"""
Get the raw XML of a text node
"""
if node is None:
return None

for child in node.childNodes:
if child.nodeType == child.TEXT_NODE:
return child.toxml()
return None


def findtext(root, tag, namespace=None):
"""
Get text of node by tag and namespace under Node root.
Expand Down
101 changes: 72 additions & 29 deletions tests/common/protocol/test_goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
import glob
import os
import re
import shutil
import time

from azurelinuxagent.common import conf
from azurelinuxagent.common.future import httpclient
from azurelinuxagent.common.future import httpclient, urlparse
from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource, GoalStateChannel
from azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config import ExtensionsGoalStateFromExtensionsConfig
from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings
Expand Down Expand Up @@ -162,40 +163,82 @@ def http_get_handler(url, *_, **__):
self._assert_directory_contents(
self._find_history_subdirectory("234-987"), ["VmSettings.json"])

def test_it_should_redact_the_protected_settings_when_saving_to_the_history_directory(self):
with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol:
protocol.mock_wire_data.set_incarnation(888)
protocol.mock_wire_data.set_etag(888)
def test_it_should_redact_extensions_config(self):
data_file = wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE.copy()
data_file["ext_conf"] = "wire/ext_conf_redact.xml"
with mock_wire_protocol(data_file, detect_protocol=False) as protocol:
protocol.mock_wire_data.set_incarnation(888) # set the incarnation to a known value that we can use to find the history directory

goal_state = GoalState(protocol.client, save_to_history=True)

extensions_goal_state = goal_state.extensions_goal_state
protected_settings = []
for ext_handler in extensions_goal_state.extensions:
for extension in ext_handler.settings:
if extension.protectedSettings is not None:
protected_settings.append(extension.protectedSettings)
if goal_state.extensions_goal_state.source != GoalStateSource.Fabric:
raise Exception("The test goal state should be Fabric (it is {0})".format(goal_state.extensions_goal_state.source))

protected_settings = [s.protectedSettings for s in [e.settings[0] for e in goal_state.extensions_goal_state.extensions]]
if len(protected_settings) == 0:
raise Exception("The test goal state does not include any protected settings")

history_directory = self._find_history_subdirectory("888-888")
extensions_config_file = os.path.join(history_directory, "ExtensionsConfig.xml")
vm_settings_file = os.path.join(history_directory, "VmSettings.json")
for file_name in extensions_config_file, vm_settings_file:
with open(file_name, "r") as stream:
file_contents = stream.read()

for settings in protected_settings:
self.assertNotIn(
settings,
file_contents,
"The protectedSettings should not have been saved to {0}".format(file_name))

matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\* REDACTED \*\*\*"', file_contents)
self.assertEqual(
len(matches),
len(protected_settings),
"Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(file_name, len(protected_settings), file_contents))
history_directory = self._find_history_subdirectory("888")
extensions_config = os.path.join(history_directory, "ExtensionsConfig.xml")
with open(extensions_config, "r") as f:
history_contents = f.read()

vmap_blob = re.sub(r'(?s)(.*<InVMArtifactsProfileBlob.*>)(.*)(</InVMArtifactsProfileBlob>.*)', r'\2', goal_state.extensions_goal_state._text)
query = urlparse(vmap_blob).query
redacted = vmap_blob.replace(query, "***REDACTED***")
self.assertNotIn(query, history_contents, "The VMAP query string was not redacted from the history")
self.assertNotIn(vmap_blob, history_contents, "The VMAP URL was not redacted in the history")
self.assertIn(redacted, history_contents, "Could not find the redacted VMAP URL in the history")

status_blob = re.sub(r'(?s)(.*<StatusUploadBlob.*>)(.*)(</StatusUploadBlob>.*)', r'\2', goal_state.extensions_goal_state._text)
query = urlparse(status_blob).query
redacted = status_blob.replace(query, "***REDACTED***")
self.assertNotIn(query, history_contents, "The Status query string was not redacted from the history")
self.assertNotIn(status_blob, history_contents, "The Status URL was not redacted in the history")
self.assertIn(redacted, history_contents, "Could not find the redacted Status URL in the history")

for s in protected_settings:
self.assertNotIn(s, history_contents, "The protected settings were not redacted from the history")
matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\*REDACTED\*\*\*"', history_contents)
self.assertEqual(len(matches), len(protected_settings),
"Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(extensions_config, len(protected_settings), history_contents))

def test_it_should_redact_vm_settings(self):
# NOTE: vm_settings-redact_formatted.json is the same as vm_settings-redact.json, but formatted for easier reading
for test_file in ["hostgaplugin/vm_settings-redact.json", "hostgaplugin/vm_settings-redact_formatted.json"]:
data_file = wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE.copy()
data_file["vm_settings"] = test_file
data_file["ETag"] = "123"
with mock_wire_protocol(data_file, detect_protocol=False) as protocol:
goal_state = GoalState(protocol.client, save_to_history=True)

if goal_state.extensions_goal_state.source != GoalStateSource.FastTrack:
raise Exception("The test goal state should be FastTrack (it is {0}) [test: {1}]".format(goal_state.extensions_goal_state.source, test_file))

protected_settings = [s.protectedSettings for s in [e.settings[0] for e in goal_state.extensions_goal_state.extensions]]
if len(protected_settings) == 0:
raise Exception("The test goal state does not include any protected settings [test: {0}]".format(test_file))

history_directory = self._find_history_subdirectory("*-123")
vm_settings = os.path.join(history_directory, "VmSettings.json")
with open(vm_settings, "r") as f:
history_contents = f.read()

status_blob = goal_state.extensions_goal_state.status_upload_blob
query = urlparse(status_blob).query
redacted = status_blob.replace(query, "***REDACTED***")
self.assertNotIn(query, history_contents, "The Status query string was not redacted from the history [test: {0}]".format(test_file))
self.assertNotIn(status_blob, history_contents, "The Status URL was not redacted in the history [test: {0}]".format(test_file))
self.assertIn(redacted, history_contents, "Could not find the redacted Status URL in the history [test: {0}]".format(test_file))

for s in protected_settings:
self.assertNotIn(s, history_contents, "The protected settings were not redacted from the history [test: {0}]".format(test_file))

matches = re.findall(r'"protectedSettings"\s*:\s*"\*\*\*REDACTED\*\*\*"', history_contents)
self.assertEqual(len(matches), len(protected_settings),
"Could not find the expected number of redacted settings in {0} [test {1}].\nExpected {2}.\n{3}".format(vm_settings, test_file, len(protected_settings), history_contents))

shutil.rmtree(history_directory) # clean up the history directory in-between test cases to avoid stale history files

def test_it_should_save_vm_settings_on_parse_errors(self):
with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol:
Expand Down
Loading

0 comments on commit 8ad3e96

Please sign in to comment.