From c3072d02198cfcd9baa8aed4d3db284f7d8e8850 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Wed, 26 Apr 2023 23:35:15 +1000 Subject: [PATCH 1/6] Refactor filename matching logic to separate classes Signed-off-by: Olivier Mehani --- rotate_backups/__init__.py | 173 +++++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 56 deletions(-) diff --git a/rotate_backups/__init__.py b/rotate_backups/__init__.py index d144b6b..efd06f2 100644 --- a/rotate_backups/__init__.py +++ b/rotate_backups/__init__.py @@ -260,6 +260,99 @@ def rotate_backups(directory, rotation_scheme, **options): program.rotate_backups(directory) +class Match: + """An interface-like class for a date match.""" + + def match_to_datetime(self) -> datetime.datetime: + """Return a datetime from the Match.""" + pass + + +class Matcher: + """An interface-like class for a date-matching scheme.""" + + def search(self, location: str, entry: str) -> Match: + """Process an entry in a location and return a Match.""" + pass + + +class FilenameMatch(Match): + """A match based on a filename pattern.""" + + match: re.Match = None + + def __init__(self, match: re.Match): + """Make a Match from a regular expression match.""" + self.match = match + + def match_to_datetime(self) -> datetime.datetime: + """ + Convert the regular expression match to a :class:`~datetime.datetime` value. + + :returns: A :class:`~datetime.datetime` value. + :raises: :exc:`exceptions.ValueError` when a required date component is + not captured by the pattern, the captured value is an empty + string or the captured value cannot be interpreted as a + base-10 integer. + + .. seealso:: :data:`SUPPORTED_DATE_COMPONENTS` + """ + kw = {} + captures = self.match.groupdict() + for component, required in SUPPORTED_DATE_COMPONENTS: + value = captures.get(component) + if value: + kw[component] = int(value, 10) + elif required: + raise ValueError("Missing required date component! (%s)" % component) + else: + kw[component] = 0 + return datetime.datetime(**kw) + + +class FilenameMatcher(Matcher, PropertyManager): + """A date-matching scheme based on filenames.""" + + def search(self, location: str, entry: str) -> FilenameMatch: + """Apply the pattern to the entry's name, and return a Match if found.""" + if match := self.timestamp_pattern.search(entry): + return FilenameMatch(match) + + @mutable_property + def timestamp_pattern(self): + """ + The pattern used to extract timestamps from filenames (defaults to :data:`TIMESTAMP_PATTERN`). + + The value of this property is a compiled regular expression object. + Callers can provide their own compiled regular expression which + makes it possible to customize the compilation flags (see the + :func:`re.compile()` documentation for details). + + The regular expression pattern is expected to be a Python compatible + regular expression that defines the named capture groups 'year', + 'month' and 'day' and optionally 'hour', 'minute' and 'second'. + + String values are automatically coerced to compiled regular expressions + by calling :func:`~humanfriendly.coerce_pattern()`, in this case only + the :data:`re.VERBOSE` flag is used. + + If the caller provides a custom pattern it will be validated + to confirm that the pattern contains named capture groups + corresponding to each of the required date components + defined by :data:`SUPPORTED_DATE_COMPONENTS`. + """ + return TIMESTAMP_PATTERN + + @timestamp_pattern.setter + def timestamp_pattern(self, value): + """Coerce the value of :attr:`timestamp_pattern` to a compiled regular expression.""" + pattern = coerce_pattern(value, re.VERBOSE) + for component, required in SUPPORTED_DATE_COMPONENTS: + if component not in pattern.groupindex and required: + raise ValueError("Pattern is missing required capture group! (%s)" % component) + set_property(self, 'timestamp_pattern', pattern) + + class RotateBackups(PropertyManager): """Python API for the ``rotate-backups`` program.""" @@ -447,39 +540,31 @@ def strict(self): """ return True - @mutable_property + @property def timestamp_pattern(self): - """ - The pattern used to extract timestamps from filenames (defaults to :data:`TIMESTAMP_PATTERN`). - - The value of this property is a compiled regular expression object. - Callers can provide their own compiled regular expression which - makes it possible to customize the compilation flags (see the - :func:`re.compile()` documentation for details). - - The regular expression pattern is expected to be a Python compatible - regular expression that defines the named capture groups 'year', - 'month' and 'day' and optionally 'hour', 'minute' and 'second'. - - String values are automatically coerced to compiled regular expressions - by calling :func:`~humanfriendly.coerce_pattern()`, in this case only - the :data:`re.VERBOSE` flag is used. - - If the caller provides a custom pattern it will be validated - to confirm that the pattern contains named capture groups - corresponding to each of the required date components - defined by :data:`SUPPORTED_DATE_COMPONENTS`. - """ - return TIMESTAMP_PATTERN + """Pattern to use to extract a timestamp from a filename.""" + if not isinstance(self.matcher, FilenameMatcher): + raise ValueError('Current matcher is not a FilenameMatcher') + return self.matcher.timestamp_pattern @timestamp_pattern.setter def timestamp_pattern(self, value): - """Coerce the value of :attr:`timestamp_pattern` to a compiled regular expression.""" - pattern = coerce_pattern(value, re.VERBOSE) - for component, required in SUPPORTED_DATE_COMPONENTS: - if component not in pattern.groupindex and required: - raise ValueError("Pattern is missing required capture group! (%s)" % component) - set_property(self, 'timestamp_pattern', pattern) + """Pattern to use to extract a timestamp from a filename.""" + if not isinstance(self.matcher, FilenameMatcher): + raise ValueError('Current matcher is not a FilenameMatcher') + self.matcher.timestamp_pattern = value + + @cached_property + def matcher(self): + """Matcher to use to extract a timestamp from a file.""" + return FilenameMatcher(timestamp_pattern=TIMESTAMP_PATTERN) + + @matcher.setter + def matcher(self, matcher): + """Matcher to use to extract a timestamp from a file.""" + if not isinstance(matcher, Matcher): + raise ValueError(f'{matcher} is not a Matcher') + set_property(self, '_matcher', matcher) def rotate_concurrent(self, *locations, **kw): """ @@ -642,8 +727,9 @@ def collect_backups(self, location): location = coerce_location(location) logger.info("Scanning %s for backups ..", location) location.ensure_readable(self.force) + for entry in natsort(location.context.list_entries(location.directory)): - match = self.timestamp_pattern.search(entry) + match = self.matcher.search(location, entry) if match: if self.exclude_list and any(fnmatch.fnmatch(entry, p) for p in self.exclude_list): logger.verbose("Excluded %s (it matched the exclude list).", entry) @@ -653,7 +739,7 @@ def collect_backups(self, location): try: backups.append(Backup( pathname=os.path.join(location.directory, entry), - timestamp=self.match_to_datetime(match), + timestamp=match.match_to_datetime(), )) except ValueError as e: logger.notice("Ignoring %s due to invalid date (%s).", entry, e) @@ -663,31 +749,6 @@ def collect_backups(self, location): logger.info("Found %i timestamped backups in %s.", len(backups), location) return sorted(backups) - def match_to_datetime(self, match): - """ - Convert a regular expression match to a :class:`~datetime.datetime` value. - - :param match: A regular expression match object. - :returns: A :class:`~datetime.datetime` value. - :raises: :exc:`exceptions.ValueError` when a required date component is - not captured by the pattern, the captured value is an empty - string or the captured value cannot be interpreted as a - base-10 integer. - - .. seealso:: :data:`SUPPORTED_DATE_COMPONENTS` - """ - kw = {} - captures = match.groupdict() - for component, required in SUPPORTED_DATE_COMPONENTS: - value = captures.get(component) - if value: - kw[component] = int(value, 10) - elif required: - raise ValueError("Missing required date component! (%s)" % component) - else: - kw[component] = 0 - return datetime.datetime(**kw) - def group_backups(self, backups): """ Group backups collected by :func:`collect_backups()` by rotation frequencies. From a46ff14ef8b0cd18e6385dbbc22950334eda99d8 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 27 Apr 2023 00:00:45 +1000 Subject: [PATCH 2/6] Add FilestatMatcher Signed-off-by: Olivier Mehani --- rotate_backups/__init__.py | 42 +++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/rotate_backups/__init__.py b/rotate_backups/__init__.py index efd06f2..905cb3a 100644 --- a/rotate_backups/__init__.py +++ b/rotate_backups/__init__.py @@ -353,8 +353,41 @@ def timestamp_pattern(self, value): set_property(self, 'timestamp_pattern', pattern) -class RotateBackups(PropertyManager): +class FilestatMatch(Match): + """A match based on a filename statistics.""" + + entry: str = None + field: str = None + + def __init__(self, entry: str, timestamp='mtime'): + """Make a Match from a field's stat attribute.""" + self.entry = entry + self.field = f'st_{timestamp}' + + def match_to_datetime(self) -> datetime.datetime: + """Return a datetime from the file's stat field.""" + return datetime.datetime.fromtimestamp( + getattr(os.stat(self.entry), self.field) + ) + + +class FilestatMatcher(Matcher): + """A date-matching scheme based on file statistics.""" + timestamp: str = None + + def __init__(self, timestamp: str = 'mtime'): + """Make a Matcher based on a file's stat attribute.""" + self.timestamp = timestamp + + def search(self, location: str, entry: str) -> FilestatMatch: + """Return the file's stat attribute.""" + return FilestatMatch( + os.path.join(location.directory, entry), + self.timestamp) + + +class RotateBackups(PropertyManager): """Python API for the ``rotate-backups`` program.""" def __init__(self, rotation_scheme, **options): @@ -371,6 +404,9 @@ def __init__(self, rotation_scheme, **options): """ options.update(rotation_scheme=rotation_scheme) super(RotateBackups, self).__init__(**options) + if self.stat_timestamp: + logger.info("Using file mtime to determine file date") + self.matcher = FilestatMatcher() @mutable_property def config_file(self): @@ -511,6 +547,10 @@ def rotation_scheme(self): in the dictionary. """ + @mutable_property + def stat_timestamp(self): + """Whether to use the files' mtime instead of parsing their name.""" + @mutable_property def strict(self): """ From 6a0e203148fd1a8354bb6bd7b61328446ac424bc Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 27 Apr 2023 00:26:57 +1000 Subject: [PATCH 3/6] Allow to choose the Matcher from the command line Signed-off-by: Olivier Mehani --- rotate_backups/cli.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/rotate_backups/cli.py b/rotate_backups/cli.py index c7746fe..6d89b4a 100644 --- a/rotate_backups/cli.py +++ b/rotate_backups/cli.py @@ -168,6 +168,11 @@ readable and/or writable for the current user (or the user logged in to a remote system over SSH). + -s, --stat-timestamp + + Use mtime stat timestamps, instead of filenames, to determine the + date of each file. + -S, --syslog=CHOICE Explicitly enable or disable system logging instead of letting the program @@ -241,12 +246,12 @@ def main(): selected_locations = [] # Parse the command line arguments. try: - options, arguments = getopt.getopt(sys.argv[1:], 'M:H:d:w:m:y:t:I:x:jpri:c:C:uS:fnvqh', [ + options, arguments = getopt.getopt(sys.argv[1:], 'M:H:d:w:m:y:t:I:x:jpri:c:C:usS:fnvqh', [ 'minutely=', 'hourly=', 'daily=', 'weekly=', 'monthly=', 'yearly=', 'timestamp-pattern=', 'include=', 'exclude=', 'parallel', 'prefer-recent', 'relaxed', 'ionice=', 'config=', - 'removal-command=', 'use-sudo', 'syslog=', 'force', - 'dry-run', 'verbose', 'quiet', 'help', + 'removal-command=', 'use-sudo', 'stat-timestamp', 'syslog=', + 'force', 'dry-run', 'verbose', 'quiet', 'help', ]) for option, value in options: if option in ('-M', '--minutely'): @@ -284,6 +289,8 @@ def main(): kw['removal_command'] = removal_command elif option in ('-u', '--use-sudo'): use_sudo = True + elif option in ('-s', '--stat-timestamp'): + kw['stat_timestamp'] = True elif option in ('-S', '--syslog'): use_syslog = coerce_boolean(value) elif option in ('-f', '--force'): From 0f0fad602dcf88411f6c8d6212c09267677e77e9 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Mon, 29 May 2023 23:54:32 +1000 Subject: [PATCH 4/6] fixup! Support for backup rotation on remote systems --- rotate_backups/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rotate_backups/__init__.py b/rotate_backups/__init__.py index 905cb3a..81f3d42 100644 --- a/rotate_backups/__init__.py +++ b/rotate_backups/__init__.py @@ -604,7 +604,7 @@ def matcher(self, matcher): """Matcher to use to extract a timestamp from a file.""" if not isinstance(matcher, Matcher): raise ValueError(f'{matcher} is not a Matcher') - set_property(self, '_matcher', matcher) + set_property(self, 'matcher', matcher) def rotate_concurrent(self, *locations, **kw): """ From c6f61d7b8ee0222b8573f61a674a2e98aba69012 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Mon, 29 May 2023 23:55:35 +1000 Subject: [PATCH 5/6] Add test for --stat-timestamp Signed-off-by: Olivier Mehani --- rotate_backups/tests.py | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/rotate_backups/tests.py b/rotate_backups/tests.py index c3b8078..7d72e5a 100644 --- a/rotate_backups/tests.py +++ b/rotate_backups/tests.py @@ -11,6 +11,7 @@ import datetime import logging import os +import time # External dependencies. from executor import ExternalCommandFailed @@ -21,6 +22,7 @@ # The module we're testing. from rotate_backups import ( RotateBackups, + FilenameMatcher, coerce_location, coerce_retention_period, load_config_file, @@ -217,6 +219,55 @@ def test_rotate_backups(self): backups_that_were_preserved = set(os.listdir(root)) assert backups_that_were_preserved == expected_to_be_preserved + def test_rotate_backups_mtime(self): + """Test the :func:`.rotate_backups()` function.""" + # These are the backups expected to be preserved. After each backup + # I've noted which rotation scheme it falls in and the number of + # preserved backups within that rotation scheme (counting up as we + # progress through the backups sorted by date). + expected_to_be_preserved = set([ + '2013-10-10@20:07', # monthly (1), yearly (1) + '2013-11-01@20:06', # monthly (2) + '2013-12-01@20:07', # monthly (3) + '2014-01-01@20:07', # monthly (4), yearly (2) + '2014-02-01@20:05', # monthly (5) + '2014-03-01@20:04', # monthly (6) + '2014-04-01@20:03', # monthly (7) + '2014-05-01@20:06', # monthly (8) + '2014-06-01@20:01', # monthly (9) + '2014-06-09@20:01', # weekly (1) + '2014-06-16@20:02', # weekly (2) + '2014-06-23@20:04', # weekly (3) + '2014-06-26@20:04', # daily (1) + '2014-06-27@20:02', # daily (2) + '2014-06-28@20:02', # daily (3) + '2014-06-29@20:01', # daily (4) + '2014-06-30@20:03', # daily (5), weekly (4) + '2014-07-01@20:02', # daily (6), monthly (10) + '2014-07-02@20:03', # hourly (1), daily (7) + ]) + with TemporaryDirectory(prefix='rotate-backups-', suffix='-test-suite') as root: + # Specify the rotation scheme and options through a configuration file. + config_file = os.path.join(root, 'rotate-backups.ini') + subdir = os.path.join(root, 'mtime') + parser = configparser.RawConfigParser() + parser.add_section(subdir) + parser.set(subdir, 'hourly', '24') + parser.set(subdir, 'daily', '7') + parser.set(subdir, 'weekly', '4') + parser.set(subdir, 'monthly', '12') + parser.set(subdir, 'yearly', 'always') + parser.set(subdir, 'ionice', 'idle') + with open(config_file, 'w') as handle: + parser.write(handle) + self.create_sample_backup_set(root) + map = self.apply_mtime_and_rename(root, subdir) + run_cli(main, '--verbose', '--config=%s' % config_file, + '--stat-timestamp') + backups_that_were_preserved = set(os.listdir(subdir)) + assert backups_that_were_preserved == set([map[e] + for e in expected_to_be_preserved]) + def test_rotate_concurrent(self): """Test the :func:`.rotate_concurrent()` function.""" # These are the backups expected to be preserved @@ -506,6 +557,21 @@ def create_sample_backup_set(self, root): for name in SAMPLE_BACKUP_SET: os.mkdir(os.path.join(root, name)) + def apply_mtime_and_rename(self, root, subdir): + """Extract mtime from filename, update file stat, rename and return a map of old to new name""" + os.mkdir(subdir) + fm = FilenameMatcher() + map = {} + for name in os.listdir(root): + map[name] = name + if match := fm.search(root, name): + file = os.path.join(root, name) + t = time.mktime(match.match_to_datetime().timetuple()) + os.utime(file, times=(t, t)) + os.rename(file, os.path.join(subdir, f'{t}s')) + map[name] = f'{t}s' + return map + @contextlib.contextmanager def readonly_directory(pathname): From 4b27e701ad1b6d895d7b82b95871716d2961daa0 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Thu, 1 Jun 2023 21:16:45 +1000 Subject: [PATCH 6/6] Allow to pass stat-timestamp in config Signed-off-by: Olivier Mehani --- rotate_backups/__init__.py | 14 +++++++++++--- rotate_backups/tests.py | 4 ++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/rotate_backups/__init__.py b/rotate_backups/__init__.py index 81f3d42..5d50cb9 100644 --- a/rotate_backups/__init__.py +++ b/rotate_backups/__init__.py @@ -228,6 +228,8 @@ def load_config_file(configuration_file=None, expand=True): # 'timestamp-pattern' configuration file option has a value set. if items.get('timestamp-pattern'): options['timestamp_pattern'] = items['timestamp-pattern'] + if items.get('stat-timestamp'): + options['stat_timestamp'] = items['stat-timestamp'] # Expand filename patterns? if expand and location.have_wildcards: logger.verbose("Expanding filename pattern %s on %s ..", location.directory, location.context) @@ -404,9 +406,6 @@ def __init__(self, rotation_scheme, **options): """ options.update(rotation_scheme=rotation_scheme) super(RotateBackups, self).__init__(**options) - if self.stat_timestamp: - logger.info("Using file mtime to determine file date") - self.matcher = FilestatMatcher() @mutable_property def config_file(self): @@ -550,6 +549,15 @@ def rotation_scheme(self): @mutable_property def stat_timestamp(self): """Whether to use the files' mtime instead of parsing their name.""" + return isinstance(self.match, FilestatMatcher) + + @stat_timestamp.setter + def stat_timestamp(self, value): + if value: + logger.info("Using file mtime to determine file date") + self.matcher = FilestatMatcher() + elif isinstance(self.match, FilestatMatcher): + del self.matcher @mutable_property def strict(self): diff --git a/rotate_backups/tests.py b/rotate_backups/tests.py index 7d72e5a..9ba7d75 100644 --- a/rotate_backups/tests.py +++ b/rotate_backups/tests.py @@ -258,12 +258,12 @@ def test_rotate_backups_mtime(self): parser.set(subdir, 'monthly', '12') parser.set(subdir, 'yearly', 'always') parser.set(subdir, 'ionice', 'idle') + parser.set(subdir, 'stat-timestamp', 'yes') with open(config_file, 'w') as handle: parser.write(handle) self.create_sample_backup_set(root) map = self.apply_mtime_and_rename(root, subdir) - run_cli(main, '--verbose', '--config=%s' % config_file, - '--stat-timestamp') + run_cli(main, '--verbose', '--config=%s' % config_file) backups_that_were_preserved = set(os.listdir(subdir)) assert backups_that_were_preserved == set([map[e] for e in expected_to_be_preserved])