diff --git a/charmhelpers/core/unitdata.py b/charmhelpers/core/unitdata.py index d9b8d0b09..a250939b7 100644 --- a/charmhelpers/core/unitdata.py +++ b/charmhelpers/core/unitdata.py @@ -147,6 +147,7 @@ def config_changed(): """ import collections +import collections.abc import contextlib import datetime import itertools @@ -271,12 +272,13 @@ def unsetrange(self, keys=None, prefix=""): 'insert into kv_revisions values (?, ?, ?)', ['%s%%' % prefix, self.revision, json.dumps('DELETED')]) - def set(self, key, value): + def set(self, key, value, delta_revisions=False): """ Set a value in the database. :param str key: Key to set the value for :param value: Any JSON-serializable value to be set + :param delta_revisions: Only keep delta of the changes as revision """ serialized = json.dumps(value) @@ -302,6 +304,21 @@ def set(self, key, value): if not self.revision: return value + serialized_revision = serialized + if delta_revisions: + # Delta revisions is only available for Mapping + # types at the moment. + if isinstance(value, collections.abc.Mapping): + # If the invocation opted in for delta-revisions + # only the delta between current and new will be + # serialized and pushed as a revision. + # This is for keeping a low footprint on disk space + current = None + if exists: + current = json.loads(exists[0]) + delta = self.mapping_delta(value, current) + serialized_revision = json.dumps(delta) + self.cursor.execute( 'select 1 from kv_revisions where key=? and revision=?', [key, self.revision]) @@ -311,7 +328,7 @@ def set(self, key, value): self.cursor.execute( '''insert into kv_revisions ( revision, key, data) values (?, ?, ?)''', - (self.revision, key, serialized)) + (self.revision, key, serialized_revision)) else: self.cursor.execute( ''' @@ -319,39 +336,52 @@ def set(self, key, value): set data = ? where key = ? and revision = ?''', - [serialized, key, self.revision]) + [serialized_revision, key, self.revision]) return value - def delta(self, mapping, prefix): + def mapping_delta(self, current_mapping, previous_mapping): """ - return a delta containing values that have changed. + return the difference between two Mapping objects. """ - previous = self.getrange(prefix, strip=True) - if not previous: - pk = set() - else: - pk = set(previous.keys()) - ck = set(mapping.keys()) delta = DeltaSet() + previous_keys = set() + current_keys = set() + if previous_mapping: + previous_keys = set(previous_mapping.keys()) + else: + previous_mapping = dict() + + if current_mapping: + current_keys = set(current_mapping.keys()) + else: + current_mapping = dict() - # added - for k in ck.difference(pk): - delta[k] = Delta(None, mapping[k]) + # Added + for k in current_keys.difference(previous_keys): + delta[k] = Delta(None, current_mapping[k]) - # removed - for k in pk.difference(ck): - delta[k] = Delta(previous[k], None) + # Removed + for k in previous_keys.difference(current_keys): + delta[k] = Delta(previous_mapping[k], None) - # changed - for k in pk.intersection(ck): - c = mapping[k] - p = previous[k] + # Changed + for k in previous_keys.intersection(current_keys): + c = current_mapping[k] + p = previous_mapping[k] if c != p: delta[k] = Delta(p, c) return delta + def delta(self, mapping, prefix): + """ + return a delta containing values that have changed. + """ + previous = self.getrange(prefix, strip=True) + + return self.mapping_delta(mapping, previous) + @contextlib.contextmanager def hook_scope(self, name=""): """Scope all future interactions to the current hook execution @@ -491,7 +521,25 @@ def _record_hook(self, hookenv): data = hookenv.execution_environment() self.conf = conf_delta = self.kv.delta(data['conf'], 'config') self.rels = rels_delta = self.kv.delta(data['rels'], 'rels') - self.kv.set('env', dict(data['env'])) + + # NOTE(mustafakemalgilor): The environment variable revisions were + # originally kept as a whole. That has caused disk space issues on + # some deployments where the size of environment variable list were + # large. This should not been an issue if the environment variable + # list were stable, but some of the environment variables (e.g. + # JUJU_CONTEXT_ID) are always changing. I've seen some deployments + # with environment variable lists ~70 KiB in size. Considering the + # fact that the record_hook function is being called on each hook + # invocation, it is no surprise that the unit-state database file + # grows with time. To give a more concrete example, even if only + # update-status hook is being called for a charm, that means at + # least 70 KiB of data is being pushed into kv_relations table every + # five minutes which is roughly equivalent to 7 GiB of kv_relations + # history in a year -- not to mention this is valid for every single + # charm. Multiple charms performing this in same environment over + # extended periods of time is enough to fill the storage space quickly. + + self.kv.set('env', dict(data['env']), delta_revisions=True) self.kv.set('unit', data['unit']) self.kv.set('relid', data.get('relid')) return conf_delta, rels_delta diff --git a/tests/core/test_unitdata.py b/tests/core/test_unitdata.py index c6a85ecf5..cfb8d0b2d 100644 --- a/tests/core/test_unitdata.py +++ b/tests/core/test_unitdata.py @@ -17,15 +17,15 @@ from mock import patch -from charmhelpers.core.unitdata import Storage, HookData, kv +from charmhelpers.core.unitdata import Storage, HookData class HookDataTest(unittest.TestCase): - def setUp(self): self.charm_dir = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(self.charm_dir)) self.change_environment(CHARM_DIR=self.charm_dir) + self.kv = Storage() def change_environment(self, **kw): original_env = dict(os.environ) @@ -40,22 +40,67 @@ def cleanup_env(): @patch('charmhelpers.core.hookenv.hook_name') @patch('charmhelpers.core.hookenv.execution_environment') @patch('charmhelpers.core.hookenv.charm_dir') - def test_hook_data_records(self, cdir, ctx, name): + @patch('charmhelpers.core.unitdata.kv') + def test_hook_data_records(self, kv, cdir, ctx, name): + kv.return_value = self.kv + hook_data = HookData() name.return_value = 'config-changed' ctx.return_value = { 'rels': {}, 'conf': {'a': 1}, 'env': {}, 'unit': 'someunit'} cdir.return_value = self.charm_dir with open(os.path.join(self.charm_dir, 'revision'), 'w') as fh: fh.write('1') - hook_data = HookData() with hook_data(): - self.assertEqual(kv(), hook_data.kv) - self.assertEqual(kv().get('charm_revisions'), ['1']) - self.assertEqual(kv().get('unit'), 'someunit') + self.assertEqual(self.kv, hook_data.kv) + self.assertEqual(self.kv.get('charm_revisions'), ['1']) + self.assertEqual(self.kv.get('unit'), 'someunit') self.assertEqual(list(hook_data.conf), ['a']) self.assertEqual(tuple(hook_data.conf.a), (None, 1)) + @patch('charmhelpers.core.hookenv.hook_name') + @patch('charmhelpers.core.hookenv.execution_environment') + @patch('charmhelpers.core.hookenv.charm_dir') + @patch('charmhelpers.core.unitdata.kv') + def test_hook_data_environment(self, kv, cdir, ctx, name): + kv.return_value = self.kv + hook_data = HookData() + name.return_value = 'config-changed' + mock_env = { + "SHELL": "/bin/bash", + "SESSION_MANAGER": "local/workstation:@/tmp/.ICE-unix/8101,unix/workstation:/tmp/.ICE-unix/8101", + "COLORTERM": "truecolor", + "LC_ADDRESS": "tr_TR.UTF-8", + "LC_NAME": "tr_TR.UTF-8", + "DESKTOP_SESSION": "ubuntu", + "LC_MONETARY": "tr_TR.UTF-8", + "PWD": "/tmp", + "XDG_SESSION_DESKTOP": "ubuntu", + "LOGNAME": "user", + "HOME": "/home/user", + "USERNAME": "user", + "LC_PAPER": "tr_TR.UTF-8", + "LANG": "en_US.UTF-8" + } + + mock_env_delta = {} + for key in mock_env: + mock_env_delta[key] = [None, mock_env[key]] + + ctx.return_value = { + 'rels': {}, 'conf': {'a': 2}, 'env': mock_env, 'unit': 'someunit'} + cdir.return_value = self.charm_dir + with open(os.path.join(self.charm_dir, 'revision'), 'w') as fh: + fh.write('2') + + self.maxDiff = 50000 + with hook_data(): + self.assertEqual(self.kv.get('env'), mock_env) + + history = list(self.kv.gethistory('env', deserialize=True)) + self.assertEqual(1, len(history)) + self.assertCountEqual(history[0][2], mock_env_delta) + class StorageTest(unittest.TestCase): @@ -108,6 +153,44 @@ def test_hook_scope(self): [(1, 'a', 1, 'config-changed'), (2, 'a', True, 'start')]) + def test_hook_scope_delta_revisions(self): + kv = Storage(':memory:') + with kv.hook_scope('some-hook') as rev: + self.assertEqual(rev, 1) + kv.set('env', {'ENVVAR1': "DUMMY1", 'ENVVAR2': "DUMMY2"}, delta_revisions=True) + kv.set('env', {'ENVVAR1': "DUMMY1_2", 'ENVVAR3': "DUMMY3"}, delta_revisions=True) + self.assertEqual(kv.get('env'), {'ENVVAR1': "DUMMY1_2", 'ENVVAR3': "DUMMY3"}) + + with kv.hook_scope('some-other-hook') as rev: + self.assertEqual(rev, 2) + kv.set('env', {'ENVVAR3': "DUMMY3"}, delta_revisions=True) + self.assertEqual(kv.get('env'), {'ENVVAR3': "DUMMY3"}) + + history = [h[:-1] for h in kv.gethistory('env', deserialize=True)] + self.assertEqual( + history, + [(1, 'env', {"ENVVAR3": [None, "DUMMY3"], "ENVVAR2": ["DUMMY2", None], "ENVVAR1": ["DUMMY1", "DUMMY1_2"]}, 'some-hook'), + (2, 'env', {"ENVVAR1": ["DUMMY1_2", None]}, 'some-other-hook')]) + + def test_hook_scope_no_delta_revisions(self): + kv = Storage(':memory:') + with kv.hook_scope('some-hook') as rev: + self.assertEqual(rev, 1) + kv.set('env', {'ENVVAR1': "DUMMY1", 'ENVVAR2': "DUMMY2"}, delta_revisions=False) + kv.set('env', {'ENVVAR1': "DUMMY1_2", 'ENVVAR3': "DUMMY3"}, delta_revisions=False) + self.assertEqual(kv.get('env'), {'ENVVAR1': "DUMMY1_2", 'ENVVAR3': "DUMMY3"}) + + with kv.hook_scope('some-other-hook') as rev: + self.assertEqual(rev, 2) + kv.set('env', {'ENVVAR3': "DUMMY3"}, delta_revisions=False) + self.assertEqual(kv.get('env'), {'ENVVAR3': "DUMMY3"}) + + history = [h[:-1] for h in kv.gethistory('env', deserialize=True)] + self.assertEqual( + history, + [(1, 'env', {"ENVVAR3": "DUMMY3", "ENVVAR1": "DUMMY1_2"}, 'some-hook'), + (2, 'env', {"ENVVAR3": "DUMMY3"}, 'some-other-hook')]) + def test_delta_no_previous_and_history(self): kv = Storage(':memory:') with kv.hook_scope('install'): @@ -177,6 +260,29 @@ def test_record(self): else: self.fail('attribute error should fire on nonexistant') + def test_mapping_delta(self): + # Add + kv = Storage(':memory:') + current_1 = {'a': 0, 'c': False} + new_1 = {'a': 0, 'b': "test", 'c': False} + delta_1 = kv.mapping_delta(new_1, current_1) + + self.assertFalse(hasattr(delta_1, 'a')) + self.assertFalse(hasattr(delta_1, 'c')) + self.assertEqual(delta_1.b.previous, None) + self.assertEqual(delta_1.b.current, "test") + + current_2 = {'a': 0, 'c': False} + new_2 = {'a': 1, 'b': "test"} + delta_2 = kv.mapping_delta(new_2, current_2) + + self.assertEqual(delta_2.a.previous, 0) + self.assertEqual(delta_2.a.current, 1) + self.assertEqual(delta_2.b.previous, None) + self.assertEqual(delta_2.b.current, "test") + self.assertEqual(delta_2.c.previous, False) + self.assertEqual(delta_2.c.current, None) + def test_delta(self): kv = Storage(':memory:') kv.update({'a': 1, 'b': 2.2}, prefix="x")