diff --git a/.github/requirements.txt b/.github/requirements.txt index 1911bb1..d240ac8 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -1,5 +1,6 @@ PyYAML >=5.4 distutils-pytest +lark-parser pytest >=3.6.0 pytest-dependency >=0.2 python-dateutil diff --git a/CHANGES.rst b/CHANGES.rst index 34fc27a..506cca0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,8 @@ Changelog New features ------------ ++ `#52`_, `#70`_: Add a `backup-tool` script. + + `#54`_: Add command line flags `--directory ` to `archive-tool create`. The script will change into this directory prior creating the archive if provided. @@ -84,6 +86,7 @@ Internal changes .. _#48: https://github.com/RKrahl/archive-tools/pull/48 .. _#50: https://github.com/RKrahl/archive-tools/issues/50 .. _#51: https://github.com/RKrahl/archive-tools/pull/51 +.. _#52: https://github.com/RKrahl/archive-tools/issues/52 .. _#53: https://github.com/RKrahl/archive-tools/issues/53 .. _#54: https://github.com/RKrahl/archive-tools/pull/54 .. _#55: https://github.com/RKrahl/archive-tools/issues/55 @@ -100,6 +103,7 @@ Internal changes .. _#66: https://github.com/RKrahl/archive-tools/pull/66 .. _#67: https://github.com/RKrahl/archive-tools/pull/67 .. _#68: https://github.com/RKrahl/archive-tools/pull/68 +.. _#70: https://github.com/RKrahl/archive-tools/pull/70 0.5.1 (2020-12-12) diff --git a/MANIFEST.in b/MANIFEST.in index bf5a022..7526730 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,5 +11,6 @@ include tests/data/manifest.yaml include tests/data/msg.txt include tests/data/rnd.dat include tests/data/rnd2.dat +include tests/data/rnd2bis.dat include tests/pytest.ini include tests/test_*.py diff --git a/README.rst b/README.rst index f3fc22c..845217d 100644 --- a/README.rst +++ b/README.rst @@ -56,6 +56,10 @@ Required library packages: + `PyYAML`_ ++ `lark-parser`_ + + Required for the `backup-tool.py` script. + Optional library packages: + `imapclient`_ @@ -136,6 +140,7 @@ permissions and limitations under the License. .. _PyPI site: https://pypi.org/project/archive-tools/ .. _PyYAML: http://pyyaml.org/wiki/PyYAML +.. _lark-parser: https://github.com/lark-parser/lark .. _imapclient: https://github.com/mjs/imapclient/ .. _python-dateutil: https://dateutil.readthedocs.io/en/stable/ .. _setuptools_scm: https://github.com/pypa/setuptools_scm/ diff --git a/archive/bt/__init__.py b/archive/bt/__init__.py new file mode 100644 index 0000000..d9da441 --- /dev/null +++ b/archive/bt/__init__.py @@ -0,0 +1,49 @@ +"""Internal modules used by the backup-tool command line tool. +""" + +import argparse +import importlib +import logging +import sys +from archive.exception import ArchiveError, ConfigError +from archive.bt.config import Config + +log = logging.getLogger(__name__) +subcmds = ( "create", "index", ) + +def backup_tool(): + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") + + argparser = argparse.ArgumentParser() + argparser.add_argument('-v', '--verbose', action='store_true', + help=("verbose diagnostic output")) + subparsers = argparser.add_subparsers(title='subcommands', dest='subcmd') + for sc in subcmds: + m = importlib.import_module('archive.bt.%s' % sc) + m.add_parser(subparsers) + args = argparser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + if not hasattr(args, "func"): + argparser.error("subcommand is required") + + try: + config = Config(args) + except ConfigError as e: + print("%s: configuration error: %s" % (argparser.prog, e), + file=sys.stderr) + sys.exit(2) + + if config.policy: + log.info("%s %s: host:%s, policy:%s", argparser.prog, args.subcmd, + config.host, config.policy) + else: + log.info("%s %s: host:%s", argparser.prog, args.subcmd, config.host) + + try: + sys.exit(args.func(args, config)) + except ArchiveError as e: + print("%s: error: %s" % (argparser.prog, e), + file=sys.stderr) + sys.exit(1) diff --git a/archive/bt/config.py b/archive/bt/config.py new file mode 100644 index 0000000..76218c9 --- /dev/null +++ b/archive/bt/config.py @@ -0,0 +1,99 @@ +"""Configuration for the backup-tool command line tool. +""" + +import datetime +import os +from pathlib import Path +import pwd +import socket +from archive.archive import DedupMode +import archive.config +from archive.exception import ConfigError + + +def get_config_file(): + try: + return os.environ['BACKUP_CFG'] + except KeyError: + return "/etc/backup.cfg" + +class Config(archive.config.Config): + + defaults = { + 'dirs': None, + 'excludes': "", + 'backupdir': None, + 'targetdir': "%(backupdir)s", + 'name': "%(host)s-%(date)s-%(schedule)s.tar.bz2", + 'schedules': None, + 'dedup': 'link', + } + args_options = ('policy', 'user') + + def __init__(self, args): + for o in self.args_options: + if not hasattr(args, o): + setattr(args, o, None) + host = socket.gethostname() + config_file = get_config_file() + if args.user: + args.policy = 'user' + if args.policy: + sections = ("%s/%s" % (host, args.policy), host, args.policy) + else: + sections = (host,) + self.config_file = config_file + super().__init__(args, config_section=sections) + if not self.config_file: + raise ConfigError("configuration file %s not found" % config_file) + self['host'] = host + self['date'] = datetime.date.today().strftime("%y%m%d") + if args.user: + try: + self['home'] = pwd.getpwnam(args.user).pw_dir + except KeyError: + pass + + @property + def host(self): + return self.get('host') + + @property + def policy(self): + return self.get('policy') + + @property + def user(self): + return self.get('user') + + @property + def schedules(self): + return self.get('schedules', required=True, split='/') + + @property + def name(self): + return self.get('name', required=True) + + @property + def dirs(self): + return self.get('dirs', required=True, split=True, type=Path) + + @property + def excludes(self): + return self.get('excludes', split=True, type=Path) + + @property + def backupdir(self): + return self.get('backupdir', required=True, type=Path) + + @property + def targetdir(self): + return self.get('targetdir', required=True, type=Path) + + @property + def dedup(self): + return self.get('dedup', required=True, type=DedupMode) + + @property + def path(self): + return self.targetdir / self.name diff --git a/archive/bt/create.py b/archive/bt/create.py new file mode 100644 index 0000000..b3ae931 --- /dev/null +++ b/archive/bt/create.py @@ -0,0 +1,118 @@ +"""Create a backup. +""" + +from collections.abc import Sequence +import datetime +import logging +import os +import pwd +from archive.archive import Archive +from archive.exception import ArchiveCreateError +from archive.index import ArchiveIndex +from archive.manifest import Manifest, DiffStatus, diff_manifest +from archive.tools import tmp_umask +from archive.bt.schedule import ScheduleDate, BaseSchedule, NoFullBackupError + + +log = logging.getLogger(__name__) + +def get_prev_backups(config): + idx_file = config.backupdir / ".index.yaml" + if idx_file.is_file(): + log.debug("reading index file %s", str(idx_file)) + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) + else: + log.debug("index file not found") + idx = ArchiveIndex() + idx.sort() + f_d = dict(host=config.host, policy=config.policy) + if config.policy == 'user': + f_d['user'] = config.user + return list(filter(lambda i: i >= f_d, idx)) + +def filter_fileinfos(base, fileinfos): + for stat, fi1, fi2 in diff_manifest(base, fileinfos): + if stat == DiffStatus.MISSING_B or stat == DiffStatus.MATCH: + continue + yield fi2 + +def get_schedule(config): + last_schedule = None + schedules = [] + for s in config.schedules: + try: + n, t = s.split(':') + except ValueError: + n = t = s + cls = BaseSchedule.SubClasses[t] + sd_str = config.get('schedule.%s.date' % n, required=True) + last_schedule = cls(n, ScheduleDate(sd_str), last_schedule) + schedules.append(last_schedule) + now = datetime.datetime.now() + for s in schedules: + if s.match_date(now): + return s + else: + log.debug("no schedule date matches now") + return None + +def get_fileinfos(config, schedule): + fileinfos = Manifest(paths=config.dirs, excludes=config.excludes) + try: + base_archives = schedule.get_base_archives(get_prev_backups(config)) + except NoFullBackupError: + raise ArchiveCreateError("No previous full backup found, can not " + "create %s archive" % schedule.name) + for p in [i.path for i in base_archives]: + log.debug("considering %s to create differential archive", p) + with Archive().open(p) as base: + fileinfos = filter_fileinfos(base.manifest, fileinfos) + return fileinfos + +def chown(path, user): + try: + pw = pwd.getpwnam(user) + except KeyError: + log.warn("User %s not found in password database", user) + return + try: + os.chown(path, pw.pw_uid, pw.pw_gid) + except OSError as e: + log.error("chown %s: %s: %s", path, type(e).__name__, e) + +def create(args, config): + schedule = get_schedule(config) + if schedule is None: + return 0 + config['schedule'] = schedule.name + fileinfos = get_fileinfos(config, schedule) + if not isinstance(fileinfos, Sequence): + fileinfos = list(fileinfos) + if not fileinfos: + log.debug("nothing to archive") + return 0 + + log.debug("creating archive %s", config.path) + + tags = [ + "host:%s" % config.host, + "policy:%s" % config.policy, + "schedule:%s" % schedule.name, + "type:%s" % schedule.ClsName, + ] + if config.user: + tags.append("user:%s" % config.user) + with tmp_umask(0o277): + arch = Archive().create(config.path, fileinfos=fileinfos, tags=tags, + dedup=config.dedup) + if config.user: + chown(arch.path, config.user) + return 0 + +def add_parser(subparsers): + parser = subparsers.add_parser('create', help="create a backup") + clsgrp = parser.add_mutually_exclusive_group() + clsgrp.add_argument('--policy', default='sys') + clsgrp.add_argument('--user') + parser.set_defaults(func=create) diff --git a/archive/bt/index.py b/archive/bt/index.py new file mode 100644 index 0000000..47ea60a --- /dev/null +++ b/archive/bt/index.py @@ -0,0 +1,29 @@ +"""Update the index of backups. +""" + +import logging +from archive.index import ArchiveIndex + + +log = logging.getLogger(__name__) + +def update_index(args, config): + idx_file = config.backupdir / ".index.yaml" + if idx_file.is_file(): + log.debug("reading index file %s", str(idx_file)) + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) + else: + log.debug("index file not found") + idx = ArchiveIndex() + idx.add_archives(config.backupdir.glob("*.tar*"), prune=args.prune) + idx.sort() + with idx_file.open("wb") as f: + idx.write(f) + return 0 + +def add_parser(subparsers): + parser = subparsers.add_parser('index', help="update backup index") + parser.add_argument('--no-prune', action='store_false', dest='prune', + help="do not remove missing backups from the index") + parser.set_defaults(func=update_index) diff --git a/archive/bt/schedule.py b/archive/bt/schedule.py new file mode 100644 index 0000000..1cbb442 --- /dev/null +++ b/archive/bt/schedule.py @@ -0,0 +1,266 @@ +"""Provide helper for the backup-tool related to schedules. +""" + +import collections +import datetime +from enum import IntEnum +import re +from lark import Lark, Transformer + + +class NoFullBackupError(Exception): + pass + + +class _DTMatcher: + """datetime component matcher to be used in ScheduleDate. + This is an abstract base class. + """ + def matches(self, value): + raise NotImplementedError + +class _DTMatcherAny(_DTMatcher): + + def matches(self, value): + return True + + def __str__(self): + return '*' + +class _DTMatcherValue(_DTMatcher): + + def __init__(self, value): + assert isinstance(value, int) + self.value = value + + def matches(self, value): + return value == self.value + + def __str__(self): + return '%d' % self.value + +class _DTMatcherInterval(_DTMatcher): + + def __init__(self, i_min, i_max): + assert isinstance(i_min, int) + assert isinstance(i_max, int) + self.i_min = i_min + self.i_max = i_max + + def matches(self, value): + return self.i_min <= value <= self.i_max + + def __str__(self): + return '[%d,%d]' % (self.i_min, self.i_max) + +class _DTMatcherList(_DTMatcher): + + def __init__(self, dtms): + self.dtms = dtms + + def matches(self, value): + for dtm in self.dtms: + if dtm.matches(value): + return True + else: + return False + + def __str__(self): + return '(%s)' % ",".join(str(m) for m in self.dtms) + +_wd = dict(Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6, Sun=7) + +_dt_fields = ('weekday', 'year', 'month', 'day', 'hour', 'minute', 'second') +_dt_tuple = collections.namedtuple('_dt_tuple', _dt_fields) + + +_sd_grammar = r""" + sd: [weekday _WS] date [_WS time] + + weekday: wd ("," wd)* -> vlist + + wd: wdstr -> value + | wdstr ".." wdstr -> intervall + + wdstr: MON | TUE | WED | THU | FRI | SAT | SUN + + date: [[dtc "-"] dtc "-"] dtc + + time: dtc ":" dtc [":" dtc] + + dtc: dtcs ("," dtcs)* -> vlist + + dtcs: "*" -> wildcard + | INT -> value + | INT ".." INT -> intervall + + MON: "Mon" + TUE: "Tue" + WED: "Wed" + THU: "Thu" + FRI: "Fri" + SAT: "Sat" + SUN: "Sun" + + _WS: (" "|/\t/)+ + + %import common.INT +""" + +class _SDTf(Transformer): + + def wdstr(self, l): + (s,) = l + return _wd[s] + + def wildcard(self, l): + return _DTMatcherAny() + + def value(self, l): + (v,) = l + return _DTMatcherValue(int(v)) + + def intervall(self, l): + (a, b) = l + return _DTMatcherInterval(int(a), int(b)) + + def vlist(self, l): + if len(l) == 1: + return l[0] + else: + return _DTMatcherList(l) + + def date(self, l): + l = list(l) + while len(l) < 3: + l.insert(0, _DTMatcherAny()) + return l + + def time(self, l): + l = list(l) + while len(l) < 3: + l.append(_DTMatcherAny()) + return l + + def sd(self, l): + l = list(l) + r = [] + # weekday + if isinstance(l[0], _DTMatcher): + r.append(l.pop(0)) + else: + r.append(_DTMatcherAny()) + # date + r.extend(l.pop(0)) + # time + if l: + r.extend(l.pop(0)) + else: + r.extend((_DTMatcherAny(), _DTMatcherAny(), _DTMatcherAny())) + return r + +_sd_parser = Lark(_sd_grammar, + start='sd', parser='lalr', transformer=_SDTf(), + maybe_placeholders=False) + +class ScheduleDate(_dt_tuple): + + def __new__(cls, spec): + l = _sd_parser.parse(spec) + return super().__new__(cls, *l) + + def __contains__(self, dt): + if isinstance(dt, datetime.datetime): + return (self.weekday.matches(dt.isoweekday()) and + self.year.matches(dt.year) and + self.month.matches(dt.month) and + self.day.matches(dt.day) and + self.hour.matches(dt.hour) and + self.minute.matches(dt.minute) and + self.second.matches(dt.second)) + else: + return False + + +class BaseSchedule: + """Abstract base class for schedules. + """ + + SubClasses = dict() + ClsName = None + + def __init__(self, name, date, parent): + self.name = name + self.date = date + self.parent = parent + + def match_date(self, dt): + return dt in self.date + + def get_base_archives(self, archives): + raise NotImplementedError + + def get_child_base_archives(self, archives): + raise NotImplementedError + + @classmethod + def register_clsname(cls, subcls): + """A class decorator to register the name for a subclass. + """ + assert issubclass(subcls, cls) + assert subcls.ClsName and subcls.ClsName not in cls.SubClasses + cls.SubClasses[subcls.ClsName] = subcls + return subcls + +@BaseSchedule.register_clsname +class FullSchedule(BaseSchedule): + + ClsName = "full" + + def get_base_archives(self, archives): + return [] + + def get_child_base_archives(self, archives): + last_full = None + for i in archives: + if i.schedule == self.name: + last_full = i + if last_full: + return [last_full] + else: + raise NoFullBackupError + +@BaseSchedule.register_clsname +class CumuSchedule(BaseSchedule): + + ClsName = "cumu" + + def get_base_archives(self, archives): + return self.parent.get_child_base_archives(archives) + + def get_child_base_archives(self, archives): + base_archives = self.parent.get_child_base_archives(archives) + p_idx = archives.index(base_archives[-1]) + last_cumu = None + for i in archives[p_idx+1:]: + if i.schedule == self.name: + last_cumu = i + if last_cumu: + base_archives.append(last_cumu) + return base_archives + +@BaseSchedule.register_clsname +class IncrSchedule(BaseSchedule): + + ClsName = "incr" + + def get_base_archives(self, archives): + base_archives = self.parent.get_child_base_archives(archives) + p_idx = archives.index(base_archives[-1]) + for i in archives[p_idx+1:]: + if i.schedule == self.name: + base_archives.append(i) + return base_archives + + def get_child_base_archives(self, archives): + return self.get_base_archives(archives) diff --git a/archive/index.py b/archive/index.py index 11521a1..9a4f485 100644 --- a/archive/index.py +++ b/archive/index.py @@ -19,6 +19,7 @@ def __init__(self, data=None, archive=None): self.policy = data.get('policy') self.user = data.get('user') self.schedule = data.get('schedule') + self.type = data.get('type') elif archive is not None: self.date = parse_date(archive.manifest.head['Date']) self.path = archive.path @@ -38,6 +39,7 @@ def __init__(self, data=None, archive=None): self.policy = tagmap.get('policy') self.user = tagmap.get('user') self.schedule = tagmap.get('schedule') + self.type = tagmap.get('type') else: raise TypeError("Either data or archive must be provided") @@ -48,7 +50,7 @@ def as_dict(self): 'date': self.date.isoformat(sep=' '), 'path': str(self.path), } - for k in ('host', 'policy', 'user', 'schedule'): + for k in ('host', 'policy', 'user', 'schedule', 'type'): v = getattr(self, k, None) if v: d[k] = v diff --git a/etc/backup.cfg b/etc/backup.cfg new file mode 100644 index 0000000..2f59d97 --- /dev/null +++ b/etc/backup.cfg @@ -0,0 +1,54 @@ +# Configuration file for backup-tool. + +# backup-tool tries to read the following configuration sections in +# order: [/], [], [], where is the +# hostname of the local machine and is the argument to the +# --policy command line option. The default policy is "sys". For +# each configuration option, the first occurrence in any of these +# sections will be used. + +# Default settings that are effectively included in all other sections. +[DEFAULT] +! backupdir = /proj/backup/auto + +# The default policy sys +# In this example, we schedule a monthly full backup for the Monday +# after the first Sunday of the month and a weekly incremental backup +# each other Monday. +[sys] +! dirs = +! /etc +! /root +! /usr/local +! excludes = +! /root/.cache +! schedules = full/incr +! schedule.full.date = Mon *-*-2..8 +! schedule.incr.date = Mon * + +# The special policy user is used when the --user command line option +# is used. +# In this example, we schedule a monthly full backup for the Monday +# after the first Sunday of the month, a weekly cumulative backup each +# other Monday and a daily incremental backup for any other day. +[user] +! name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +! dirs = %(home)s +! excludes = +! %(home)s/.cache +! %(home)s/.thumbnails +! %(home)s/tmp +! schedules = full/cumu/incr +! schedule.full.date = Mon *-*-2..8 +! schedule.cumu.date = Mon * +! schedule.incr.date = * + +# Override settings on a particular host +! [db-host] +! backupdir = /proj/db +! +! [db-host/sys] +! dirs = +! /etc +! /root +! /var/lib/mysql diff --git a/python-archive-tools.spec b/python-archive-tools.spec index 5afa5e7..ab503e0 100644 --- a/python-archive-tools.spec +++ b/python-archive-tools.spec @@ -13,11 +13,13 @@ BuildRequires: fdupes BuildRequires: python3-base >= 3.6 %if %{with tests} BuildRequires: python3-PyYAML +BuildRequires: python3-lark-parser BuildRequires: python3-distutils-pytest BuildRequires: python3-pytest-dependency >= 0.2 BuildRequires: python3-pytest >= 3.0 %endif Requires: python3-PyYAML +Requires: python3-lark-parser Recommends: python3-IMAPClient Recommends: python3-python-dateutil BuildArch: noarch @@ -53,6 +55,7 @@ python3 setup.py test %files %defattr(-,root,root) %doc README.rst +%config(noreplace) %{_sysconfdir}/backup.cfg %{python3_sitelib}/* %{_bindir}/* diff --git a/scripts/backup-tool.py b/scripts/backup-tool.py new file mode 100644 index 0000000..e6cf120 --- /dev/null +++ b/scripts/backup-tool.py @@ -0,0 +1,7 @@ +#! /usr/bin/python +"""Create a backup. +""" + +import archive.bt + +archive.bt.backup_tool() diff --git a/setup.py b/setup.py index c9925ff..f43cd72 100644 --- a/setup.py +++ b/setup.py @@ -117,9 +117,11 @@ def run(self): author_email = "rolf@rotkraut.de", url = "https://github.com/RKrahl/archive-tools", license = "Apache-2.0", - requires = ["yaml"], - packages = ["archive", "archive.cli"], - scripts = ["scripts/archive-tool.py", "scripts/imap-to-archive.py"], + requires = ["yaml", "lark"], + packages = ["archive", "archive.cli", "archive.bt"], + scripts = ["scripts/archive-tool.py", "scripts/backup-tool.py", + "scripts/imap-to-archive.py"], + data_files = [("/etc", ["etc/backup.cfg"])], classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: System Administrators", diff --git a/tests/conftest.py b/tests/conftest.py index 5778fc5..9cc2697 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ """pytest configuration. """ +import datetime import hashlib import os from pathlib import Path @@ -15,7 +16,8 @@ __all__ = [ - 'DataDir', 'DataFile', 'DataRandomFile', 'DataSymLink', + 'FrozenDateTime', 'FrozenDate', 'MockFunction', + 'DataDir', 'DataFile', 'DataContentFile', 'DataRandomFile', 'DataSymLink', 'absflag', 'archive_name', 'callscript', 'check_manifest', 'get_output', 'gettestdata', 'require_compression', 'setup_testdata', 'sub_testdata', @@ -55,6 +57,39 @@ def require_compression(compression): except ImportError: pytest.skip(msg % ("lzma", "xz")) +class FrozenDateTime(datetime.datetime): + _frozen = datetime.datetime.now() + + @classmethod + def freeze(cls, dt): + cls._frozen = dt + + @classmethod + def now(cls, tz=None): + return cls._frozen + +class FrozenDate(datetime.date): + + @classmethod + def today(cls): + return FrozenDateTime.now().date() + +class MockFunction: + """A function returning a preset value. + + May be used to mock library functions, such as pwd.getpwnam() or + socket.gethostname(). + """ + + def __init__(self, value=None): + self.set_return_value(value) + + def set_return_value(self, value): + self._value = value + + def __call__(self, *args): + return self._value + class TmpDir(object): """Provide a temporary directory. """ @@ -117,6 +152,7 @@ def _set_fs_attrs(path, mode, mtime): path.chmod(mode) if mtime is not None: os.utime(path, (mtime, mtime), follow_symlinks=False) + os.utime(path.parent, (mtime, mtime), follow_symlinks=False) class DataItem: @@ -139,6 +175,12 @@ def st_mode(self): def create(self, main_dir): raise NotImplementedError + def unlink(self, main_dir, mtime=None): + path = main_dir / self.path + path.unlink() + if mtime: + os.utime(path.parent, (mtime, mtime), follow_symlinks=False) + class DataFileOrDir(DataItem): def __init__(self, path, mode, *, mtime=None): @@ -149,6 +191,22 @@ def __init__(self, path, mode, *, mtime=None): def mode(self): return self._mode + @mode.setter + def mode(self, mode): + self._mode = mode + +class DataFileBase(DataFileOrDir): + + Checksums = _get_checksums() + + @property + def type(self): + return 'f' + + @property + def checksum(self): + return self._checksum or self.Checksums[self.path.name] + class DataDir(DataFileOrDir): @property @@ -160,53 +218,40 @@ def create(self, main_dir): path.mkdir(parents=True, exist_ok=True) _set_fs_attrs(path, self.mode, self.mtime) -class DataFile(DataFileOrDir): - - Checksums = _get_checksums() +class DataFile(DataFileBase): def __init__(self, path, mode, *, mtime=None, checksum=None): super().__init__(path, mode, mtime=mtime) self._checksum = checksum - @property - def type(self): - return 'f' - - @property - def checksum(self): - return self._checksum or self.Checksums[self.path.name] - def create(self, main_dir): path = main_dir / self.path path.parent.mkdir(parents=True, exist_ok=True) shutil.copy(gettestdata(self.path.name), path) _set_fs_attrs(path, self.mode, self.mtime) -class DataRandomFile(DataFileOrDir): +class DataContentFile(DataFileBase): - def __init__(self, path, mode, *, mtime=None, size=1024): + def __init__(self, path, data, mode, *, mtime=None): super().__init__(path, mode, mtime=mtime) - self._size = size - - @property - def type(self): - return 'f' - - @property - def checksum(self): - return self._checksum + self.data = data def create(self, main_dir): path = main_dir / self.path h = hashlib.new("sha256") - data = bytearray(getrandbits(8) for _ in range(self._size)) - h.update(data) + h.update(self.data) self._checksum = h.hexdigest() path.parent.mkdir(parents=True, exist_ok=True) with path.open("wb") as f: - f.write(data) + f.write(self.data) _set_fs_attrs(path, self.mode, self.mtime) +class DataRandomFile(DataContentFile): + + def __init__(self, path, mode, *, mtime=None, size=1024): + data = bytearray(getrandbits(8) for _ in range(size)) + super().__init__(path, data, mode, mtime=mtime) + class DataSymLink(DataItem): def __init__(self, path, target, *, mtime=None): diff --git a/tests/data/.sha256 b/tests/data/.sha256 index 1a1d202..ba12ce9 100644 --- a/tests/data/.sha256 +++ b/tests/data/.sha256 @@ -1,3 +1,4 @@ b22b009134622b6508d756f1062455d71a7026594eacb0badf81f4f677929ebe msg.txt 21bad91c29230c3b1da568d4f2ccc77f6d79c0ea91ac6a40d37b2b15a2932bea rnd.dat 2d65300e0b6b56d4e50812a962b4a01db8d3a6ac96396a2d92fe59a13b286ee8 rnd2.dat +9de77792007068fa67fa063180ae970c1b7d93b80a8848a7524e4a500effafc0 rnd2bis.dat diff --git a/tests/data/rnd2bis.dat b/tests/data/rnd2bis.dat new file mode 100644 index 0000000..caa9015 Binary files /dev/null and b/tests/data/rnd2bis.dat differ diff --git a/tests/test_05_schedule.py b/tests/test_05_schedule.py new file mode 100644 index 0000000..47491da --- /dev/null +++ b/tests/test_05_schedule.py @@ -0,0 +1,121 @@ +"""Test class archive.bt.schedule.ScheduleDate. +""" + +import datetime +import pytest +from archive.bt.schedule import ScheduleDate + + +test_schedules = [ + { + 'schedule' : "Sat,Thu,Mon..Wed,Sat..Sun *", + 'dates': [ + ( datetime.datetime(2021, 7, 1, 5, 13, 21), True ), + ( datetime.datetime(2021, 7, 2, 6, 24, 36), False ), + ( datetime.datetime(2021, 7, 3, 3, 57, 42), True ), + ( datetime.datetime(2021, 7, 4, 8, 8, 48), True ), + ( datetime.datetime(2021, 7, 5, 19, 50, 14), True ), + ( datetime.datetime(2021, 7, 6, 22, 48, 56), True ), + ( datetime.datetime(2021, 7, 7, 1, 11, 49), True ), + ], + }, + { + 'schedule' : "Mon,Sun 2012-*-* 2,1:23", + 'dates': [ + ( datetime.datetime(2012, 10, 20, 1, 23, 48), False ), + ( datetime.datetime(2012, 10, 21, 1, 23, 7), True ), + ( datetime.datetime(2012, 10, 21, 2, 24, 30), False ), + ( datetime.datetime(2012, 10, 21, 3, 23, 26), False ), + ( datetime.datetime(2012, 10, 22, 1, 23, 39), True ), + ], + }, + { + 'schedule' : "Wed *-1", + 'dates': [ + ( datetime.datetime(2002, 4, 1, 13, 52, 43), False ), + ( datetime.datetime(2002, 5, 1, 17, 11, 44), True ), + ( datetime.datetime(2002, 6, 1, 2, 11, 24), False ), + ( datetime.datetime(2003, 9, 1, 6, 5, 23), False ), + ( datetime.datetime(2003, 9, 3, 2, 37, 36), False ), + ( datetime.datetime(2003, 10, 1, 15, 30, 6), True ), + ( datetime.datetime(2003, 11, 1, 20, 29, 54), False ), + ], + }, + { + 'schedule' : "Wed..Wed,Wed *-1", + 'dates': [ + ( datetime.datetime(2002, 4, 1, 13, 52, 43), False ), + ( datetime.datetime(2002, 5, 1, 17, 11, 44), True ), + ( datetime.datetime(2002, 6, 1, 2, 11, 24), False ), + ( datetime.datetime(2003, 9, 1, 6, 5, 23), False ), + ( datetime.datetime(2003, 9, 3, 2, 37, 36), False ), + ( datetime.datetime(2003, 10, 1, 15, 30, 6), True ), + ( datetime.datetime(2003, 11, 1, 20, 29, 54), False ), + ], + }, + { + 'schedule' : "10-15", + 'dates': [ + ( datetime.datetime(2017, 9, 15, 3, 8, 17), False ), + ( datetime.datetime(2017, 10, 14, 23, 48, 51), False ), + ( datetime.datetime(2017, 10, 15, 4, 12, 36), True ), + ( datetime.datetime(2018, 10, 15, 11, 14, 43), True ), + ], + }, + { + 'schedule' : "Fri 1..7 4,10,16,22:30", + 'dates': [ + ( datetime.datetime(2021, 7, 1, 4, 30, 45), False ), + ( datetime.datetime(2021, 7, 2, 4, 30, 45), True ), + ( datetime.datetime(2021, 7, 2, 5, 30, 45), False ), + ( datetime.datetime(2021, 7, 2, 16, 30, 45), True ), + ( datetime.datetime(2021, 7, 9, 16, 30, 45), False ), + ], + }, + { + 'schedule' : "Mon *-*-2..8", + 'dates': [ + ( datetime.datetime(2021, 3, 1, 3, 0), False ), + ( datetime.datetime(2021, 3, 5, 3, 0), False ), + ( datetime.datetime(2021, 3, 8, 3, 0), True ), + ( datetime.datetime(2021, 3, 15, 3, 0), False ), + ( datetime.datetime(2021, 7, 5, 3, 0), True ), + ( datetime.datetime(2021, 7, 9, 3, 0), False ), + ( datetime.datetime(2021, 7, 12, 3, 0), False ), + ], + }, + { + 'schedule' : "Mon *", + 'dates': [ + ( datetime.datetime(2021, 3, 1, 3, 0), True ), + ( datetime.datetime(2021, 3, 5, 3, 0), False ), + ( datetime.datetime(2021, 3, 8, 3, 0), True ), + ( datetime.datetime(2021, 3, 15, 3, 0), True ), + ( datetime.datetime(2021, 7, 5, 3, 0), True ), + ( datetime.datetime(2021, 7, 9, 3, 0), False ), + ( datetime.datetime(2021, 7, 12, 3, 0), True ), + ], + }, + { + 'schedule' : "*", + 'dates': [ + ( datetime.datetime(2021, 3, 1, 3, 0), True ), + ( datetime.datetime(2021, 3, 5, 3, 0), True ), + ( datetime.datetime(2021, 3, 8, 3, 0), True ), + ( datetime.datetime(2021, 3, 15, 3, 0), True ), + ( datetime.datetime(2021, 7, 5, 3, 0), True ), + ( datetime.datetime(2021, 7, 9, 3, 0), True ), + ( datetime.datetime(2021, 7, 12, 3, 0), True ), + ], + }, +] + +@pytest.mark.parametrize("schedule,dates", [ + (s['schedule'], s['dates']) for s in test_schedules +]) +def test_schedule_parse(schedule, dates): + """Various parsing examples for ScheduleDate. + """ + sd = ScheduleDate(schedule) + for d in dates: + assert (d[0] in sd) == d[1] diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py new file mode 100644 index 0000000..d9535da --- /dev/null +++ b/tests/test_06_backup-tool.py @@ -0,0 +1,1507 @@ +"""Test the backup-tool. +""" + +import datetime +import itertools +import os +from pathlib import Path +import pwd +import shutil +import socket +import string +import sys +from archive import Archive +from archive.index import IndexItem, ArchiveIndex +from archive.bt import backup_tool +import pytest +from _pytest.monkeypatch import MonkeyPatch +from conftest import * + + +class BTTestEnv: + """Helper class to manage the environment to test backup-tool. + """ + + def __init__(self, root): + self.root = root + self.root.mkdir() + self.monkeypatch = MonkeyPatch() + self._datetime = FrozenDateTime + self._date = FrozenDate + self._gethostname = MockFunction() + pwt = ('jdoe', '*', 1000, 1000, 'John Doe', '/home/jdoe', '/bin/bash') + self._getpwnam = MockFunction(pwd.struct_passwd(pwt)) + self.test_data = dict() + self.test_data_tags = dict() + self.index = ArchiveIndex() + self.backupdir = None + self.tmptarget = None + self.schedules = None + + def config(self, backupdir, tmptarget, schedules=('full', 'cumu', 'incr')): + self.backupdir = self.root / backupdir + self.tmptarget = self.root / tmptarget + self.schedules = schedules + + def __enter__(self): + self.monkeypatch.setattr(datetime, "datetime", self._datetime) + self.monkeypatch.setattr(datetime, "date", self._date) + self.monkeypatch.setattr(socket, "gethostname", self._gethostname) + self.monkeypatch.setattr(pwd, "getpwnam", self._getpwnam) + return self + + def __exit__(self, type, value, tb): + self.monkeypatch.undo() + + def set_datetime(self, dt): + self._datetime.freeze(dt) + + def set_hostname(self, name): + self._gethostname.set_return_value(name) + + def add_test_data(self, tags, items): + for i in items: + self.test_data[i.path] = i + for t in tags: + for s in self.schedules: + k = (t,s) + self.test_data_tags.setdefault(k, set()) + self.test_data_tags[k].add(i.path) + + def remove_test_data(self, tags, items): + for i in items: + del self.test_data[i.path] + for t in tags: + for s in self.schedules: + k = (t,s) + self.test_data_tags.setdefault(k, set()) + self.test_data_tags[k].discard(i.path) + + def flush_test_data(self, tags, schedule): + idx = self.schedules.index(schedule) + for t in tags: + for s in self.schedules[idx:]: + self.test_data_tags[t,s] = set() + + def setup_test_data(self): + setup_testdata(self.root, self.test_data.values()) + + def move_archive(self, name): + (self.tmptarget / name).rename(self.backupdir / name) + + def check_archive(self, name, tag, schedule): + path = self.backupdir / name + items = [ self.test_data[p] for p in self.test_data_tags[tag,schedule] ] + with Archive().open(path) as archive: + check_manifest(archive.manifest, items, prefix_dir=self.root) + + def check_index(self): + idx_file = self.backupdir / ".index.yaml" + backupdir_content = { idx_file } + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) + assert len(idx) == len(self.index) + for i1, i0 in zip(idx, self.index): + assert i1.as_dict() == i0.as_dict() + backupdir_content.add(i0.path) + assert set(self.backupdir.iterdir()) == backupdir_content + assert set(self.tmptarget.iterdir()) == set() + + def add_index(self, name, host, schedule, + type=None, policy=None, user=None): + if user: + policy = 'user' + idx_data = { + 'date': datetime.datetime.now().isoformat(sep=' '), + 'path': self.backupdir / name, + 'host': host, + 'policy': policy, + 'user': user, + 'schedule': schedule, + 'type': type or schedule, + } + self.index.append(IndexItem(idx_data)) + + def run_backup_tool(self, argv): + self.monkeypatch.setattr(sys, "argv", argv.split()) + with pytest.raises(SystemExit) as excinfo: + backup_tool() + assert excinfo.value.code == 0 + +@pytest.fixture(scope="class") +def env(tmpdir, request): + with BTTestEnv(tmpdir / request.cls.__name__) as e: + yield e + +class TestBackupTool: + """Test scenario: consider a directory having the following structure:: + + testdir + +-- etc + +-- home + | +-- jdoe + +-- net + | +-- backup + +-- root + +-- usr + | +-- local + +-- var + +-- backup + + Backups are created at different points in time and different + policies, see the cfg file for details: + + + host=desk, policy=sys + schedule: monthly full, weekly incr + + + host=serv, policy=sys + schedule: monthly full, weekly incr + + + host=serv, policy=user, user=jdoe + schedule: monthly full, weekly cumu, daily incr + + Tests: + + + test_initial_full: full backup of initial test data. + 2021-10-03: host=desk, policy=sys, schedule=full + 2021-10-04: host=serv, policy=sys, schedule=full + 2021-10-04: host=serv, policy=user, user=jdoe, schedule=full + + + test_simple_incr: add a few files, both in sys and in + user directories. According to schedule, only incremental user + backup will be made. + 2021-10-06: host=serv, policy=user, user=jdoe, schedule=incr + + + test_noop_incr: add only files in directories that being + excluded. Since there is nothing to backup, no backup should be + created at all. + 2021-10-07: - + + + test_content_incr: modify a file's content, but make sure + all filesystem metadata remain unchanged. + 2021-10-08: host=serv, policy=user, user=jdoe, schedule=incr + + + test_meta_incr: modify a file's metadata, but keep the + content unchanged. + 2021-10-09: host=serv, policy=user, user=jdoe, schedule=incr + + + test_simple_cumu: add some more files, both in sys and in + user directories. According to schedule, a cumulative backup + for user and incremental backups for sys are made. + 2021-10-10: host=desk, policy=sys, schedule=incr + 2021-10-11: host=serv, policy=sys, schedule=incr + 2021-10-11: host=serv, policy=user, user=jdoe, schedule=cumu + + + test_incr: add another files in a user directory. + 2021-10-13: host=serv, policy=user, user=jdoe, schedule=incr + + + test_del_incr: delete the file created for the last test + again. Only the parent directory will be added to the + incremental backup for it has a changed file modification time, + but not its content. + 2021-10-15: host=serv, policy=user, user=jdoe, schedule=incr + + + test_cumu: nothing has changed in sys directories, no + backups will be created for sys. The cumulative backup for user + will essentially have the same content as the last one. + 2021-10-17: - + 2021-10-18: - + 2021-10-18: host=serv, policy=user, user=jdoe, schedule=cumu + + + test_full: the next regular full backup. + 2021-11-07: host=desk, policy=sys, schedule=full + 2021-11-08: host=serv, policy=sys, schedule=full + 2021-11-08: host=serv, policy=user, user=jdoe, schedule=full + + """ + + cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[DEFAULT] +backupdir = $root/net/backup + +[serv] + +[desk] +targetdir = $root/var/backup + +[sys] +dirs = + $root/etc + $root/root +excludes = + $root/root/.cache +schedules = full/incr + +[desk/sys] +schedule.full.date = Sun *-*-1..7 +schedule.incr.date = Sun * + +[serv/sys] +dirs = + $root/etc + $root/root + $root/usr/local +excludes = + $root/root/.cache +schedule.full.date = Mon *-*-2..8 +schedule.incr.date = Mon * + +[user] +name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +dirs = $root/%(home)s +excludes = + $root/%(home)s/.cache + $root/%(home)s/.thumbnails + $root/%(home)s/tmp +schedules = full/cumu/incr +schedule.full.date = Mon *-*-2..8 +schedule.cumu.date = Mon * +schedule.incr.date = * +""" + + def init_data(self, env): + env.config("net/backup", "var/backup") + subst = dict(root=env.root) + cfg_data = string.Template(self.cfg).substitute(subst).encode('ascii') + cfg_path = Path("etc", "backup.cfg") + sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1633129414), + DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683), + DataContentFile(Path("etc", "foo.cfg"), + b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), + DataDir(Path("root"), 0o700, mtime=1633274230), + DataRandomFile(Path("root", "rnd5.dat"), + 0o600, size=85, mtime=1633243020), + DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), + mtime=1633243020), + ] + env.add_test_data(('desk','serv'), sys_data) + sys_serv_data = [ + DataDir(Path("usr", "local"), 0o755, mtime=1616490893), + DataRandomFile(Path("usr", "local", "rnd6.dat"), + 0o644, size=607, mtime=1633275272), + ] + env.add_test_data(('serv',), sys_serv_data) + user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd.dat"), + 0o600, size=7964, mtime=1626052455), + DataFile(Path("home", "jdoe", "rnd2.dat"), 0o640, mtime=1633050855), + DataRandomFile(Path("home", "jdoe", "rnd3.dat"), + 0o600, size=796, mtime=1633243020), + ] + env.add_test_data(('user',), user_data) + excl_data = [ + DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), + DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), + 0o600, size=385, mtime=1633275272), + DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), + DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), + DataRandomFile(Path("root", ".cache", "rnd4.dat"), + 0o600, size=665, mtime=1633275272), + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + DataDir(Path("var", "backup"), 0o755, mtime=1632704400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + @pytest.mark.dependency() + def test_initial_full(self, env): + """Full backup of initial test data. + """ + self.init_data(env) + + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 10, 3, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "desk-211003-full.tar.bz2" + env.move_archive(archive_name) + env.check_archive(archive_name, 'desk', 'full') + env.add_index(archive_name, 'desk', 'full', policy='sys') + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211004-full.tar.bz2" + env.check_archive(archive_name, 'serv', 'full') + env.add_index(archive_name, 'serv', 'full', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211004-full.tar.bz2" + env.check_archive(archive_name, 'user', 'full') + env.add_index(archive_name, 'serv', 'full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('desk', 'serv', 'user'), 'cumu') + + @pytest.mark.dependency(depends=["test_initial_full"], scope='class') + def test_simple_incr(self, env): + """Add a few files, both in sys and in user directories. + According to schedule, only incremental user backup will be + made. + """ + mtime = 1633451717 + u_path = Path("home", "jdoe", "misc") + u_dir = DataDir(u_path, 0o755, mtime=mtime) + u_file = DataRandomFile(u_path / "rnd7.dat", + 0o644, size=473, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + mtime = 1633464305 + s_path = Path("root", "rnd8.dat") + s_file = DataRandomFile(s_path, 0o600, size=42, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_dir, u_file]) + env.add_test_data(('desk','serv'), [s_parent, s_file]) + setup_testdata(env.root, [u_dir, u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211006-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_simple_incr"], scope='class') + def test_noop_incr(self, env): + """Add only files in directories that being excluded. + Since there is nothing to backup, no backup should be created at all. + """ + mtime = 1633487220 + s_path = Path("root", ".cache", "rnd10.dat") + s_file = DataRandomFile(s_path, 0o600, size=27, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + mtime = 1633500600 + u_path = Path("home", "jdoe", "tmp", "rnd9.dat") + u_file = DataRandomFile(u_path, 0o640, size=582, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('excl',), [s_parent, s_file, u_parent, u_file]) + setup_testdata(env.root, [s_file, u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_noop_incr"], scope='class') + def test_content_incr(self, env): + """Modify a file's content, but make sure all filesystem metadata + remain unchanged. + """ + u_path = Path("home", "jdoe", "rnd2.dat") + u_orig_file = env.test_data[u_path] + with gettestdata("rnd2bis.dat").open("rb") as f: + u_file = DataContentFile(u_path, f.read(), + mode=u_orig_file.mode, + mtime=u_orig_file.mtime) + u_parent = env.test_data[u_path.parent] + env.add_test_data(('user',), [u_file]) + setup_testdata(env.root, [u_parent, u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 8, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 8, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211008-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_content_incr"], scope='class') + def test_meta_incr(self, env): + """Modify a file's metadata, but keep the content unchanged. + """ + u_path = Path("home", "jdoe", "rnd3.dat") + u_file = env.test_data[u_path] + u_parent = env.test_data[u_path.parent] + u_file.mode = 0o644 + env.add_test_data(('user',), [u_file]) + (env.root / u_path).chmod(u_file.mode) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 9, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 9, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211009-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_meta_incr"], scope='class') + def test_simple_cumu(self, env): + """Add some more files, both in sys and in user directories. + According to schedule, a cumulative backup for user and + incremental backups for sys are made. + """ + mtime = 1633837020 + s0_path = Path("usr", "local", "rnd11.dat") + s0_file = DataRandomFile(s0_path, 0o644, size=528, mtime=mtime) + s0_parent = env.test_data[s0_path.parent] + s0_parent.mtime = mtime + mtime = 1633843260 + s1_path = Path("root", "rnd12.dat") + s1_file = DataRandomFile(s1_path, 0o600, size=17, mtime=mtime) + s1_parent = env.test_data[s1_path.parent] + s1_parent.mtime = mtime + mtime = 1633876920 + u_path = Path("home", "jdoe", "misc", "rnd13.dat") + u_file = DataRandomFile(u_path, 0o644, size=378, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('serv',), [s0_parent, s0_file]) + env.add_test_data(('desk','serv'), [s1_parent, s1_file]) + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [s0_file, s1_file, u_file]) + + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 10, 10, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "desk-211010-incr.tar.bz2" + env.move_archive(archive_name) + env.check_archive(archive_name, 'desk', 'incr') + env.add_index(archive_name, 'desk', 'incr', policy='sys') + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211011-incr.tar.bz2" + env.check_archive(archive_name, 'serv', 'incr') + env.add_index(archive_name, 'serv', 'incr', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211011-cumu.tar.bz2" + env.check_archive(archive_name, 'user', 'cumu') + env.add_index(archive_name, 'serv', 'cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('desk', 'serv', 'user'), 'incr') + + @pytest.mark.dependency(depends=["test_simple_cumu"], scope='class') + def test_incr(self, env): + """Add another files in a user directory. + """ + mtime = 1634067525 + u_path = Path("home", "jdoe", "misc", "rnd14.dat") + u_file = DataRandomFile(u_path, 0o644, size=146, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211013-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_incr"], scope='class') + def test_del_incr(self, env): + """Delete the file created for the last test again. + Only the parent directory will be added to the incremental + backup for it has a changed file modification time, but not + its content. + """ + mtime = 1634240325 + u_path = Path("home", "jdoe", "misc", "rnd14.dat") + u_file = env.test_data[u_path] + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.remove_test_data(('user',), [u_file]) + env.add_test_data(('user',), [u_parent]) + u_file.unlink(env.root, mtime) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 15, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 15, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211015-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_del_incr"], scope='class') + def test_cumu(self, env): + """Do the next weekly backup. + Nothing has changed in sys directories, no backups will be + created for sys. The cumulative backup for user will + essentially have the same content as the last one. + """ + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 10, 17, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211018-cumu.tar.bz2" + env.check_archive(archive_name, 'user', 'cumu') + env.add_index(archive_name, 'serv', 'cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('desk', 'serv', 'user'), 'incr') + + @pytest.mark.dependency(depends=["test_cumu"], scope='class') + def test_full(self, env): + """Do the next monthly backup. + """ + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 11, 7, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "desk-211107-full.tar.bz2" + env.move_archive(archive_name) + env.check_archive(archive_name, 'desk', 'full') + env.add_index(archive_name, 'desk', 'full', policy='sys') + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211108-full.tar.bz2" + env.check_archive(archive_name, 'serv', 'full') + env.add_index(archive_name, 'serv', 'full', policy='sys') + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211108-full.tar.bz2" + env.check_archive(archive_name, 'user', 'full') + env.add_index(archive_name, 'serv', 'full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('desk', 'serv', 'user'), 'cumu') + + +class TestBackupToolNamedSchedule: + """Use named schedules in the config file. + + Otherwise this is mostly a simplified version of class + TestBackupTool. The focus of the tests is on proper functioning + of the schedule, full vs. cumulative vs. incremental. + """ + + cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[DEFAULT] +backupdir = $root/net/backup + +[serv] + +[desk] +targetdir = $root/var/backup + +[sys] +dirs = + $root/etc + $root/root +excludes = + $root/root/.cache +schedules = monthly:full/weekly:incr + +[desk/sys] +schedule.monthly.date = Sun *-*-1..7 +schedule.weekly.date = Sun * + +[serv/sys] +dirs = + $root/etc + $root/root + $root/usr/local +excludes = + $root/root/.cache +schedule.monthly.date = Mon *-*-2..8 +schedule.weekly.date = Mon * + +[user] +name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +dirs = $root/%(home)s +excludes = + $root/%(home)s/.cache + $root/%(home)s/.thumbnails + $root/%(home)s/tmp +schedules = monthly:full/weekly:cumu/daily:incr +schedule.monthly.date = Mon *-*-2..8 +schedule.weekly.date = Mon * +schedule.daily.date = * +""" + + def init_data(self, env): + env.config("net/backup", "var/backup", + schedules=('monthly', 'weekly', 'daily')) + subst = dict(root=env.root) + cfg_data = string.Template(self.cfg).substitute(subst).encode('ascii') + cfg_path = Path("etc", "backup.cfg") + sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1633129414), + DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683), + DataContentFile(Path("etc", "foo.cfg"), + b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), + DataDir(Path("root"), 0o700, mtime=1633274230), + DataRandomFile(Path("root", "rnd5.dat"), + 0o600, size=85, mtime=1633243020), + DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), + mtime=1633243020), + DataDir(Path("usr", "local"), 0o755, mtime=1616490893), + DataRandomFile(Path("usr", "local", "rnd6.dat"), + 0o644, size=607, mtime=1633275272), + ] + env.add_test_data(('sys',), sys_data) + user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd.dat"), + 0o600, size=7964, mtime=1626052455), + DataFile(Path("home", "jdoe", "rnd2.dat"), 0o640, mtime=1633050855), + DataRandomFile(Path("home", "jdoe", "rnd3.dat"), + 0o600, size=796, mtime=1633243020), + ] + env.add_test_data(('user',), user_data) + excl_data = [ + DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), + DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), + 0o600, size=385, mtime=1633275272), + DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), + DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), + DataRandomFile(Path("root", ".cache", "rnd4.dat"), + 0o600, size=665, mtime=1633275272), + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + DataDir(Path("var", "backup"), 0o755, mtime=1632704400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + @pytest.mark.dependency() + def test_initial_monthly(self, env): + """Full backup of initial test data. + """ + self.init_data(env) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211004-monthly.tar.bz2" + env.check_archive(archive_name, 'sys', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='full', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211004-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys', 'user'), 'weekly') + + @pytest.mark.dependency(depends=["test_initial_monthly"], scope='class') + def test_first_daily(self, env): + """First incremental backup in the first week. + """ + mtime = 1633451717 + u_path = Path("home", "jdoe", "misc") + u_dir = DataDir(u_path, 0o755, mtime=mtime) + u_file = DataRandomFile(u_path / "rnd7.dat", + 0o644, size=473, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + mtime = 1633464305 + s_path = Path("root", "rnd8.dat") + s_file = DataRandomFile(s_path, 0o600, size=42, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_dir, u_file]) + env.add_test_data(('sys',), [s_parent, s_file]) + setup_testdata(env.root, [u_dir, u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211006-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_first_daily"], scope='class') + def test_second_daily(self, env): + """Second incremental backup in the first week. + """ + mtime = 1633500600 + u_path = Path("home", "jdoe", "misc", "rnd9.dat") + u_file = DataRandomFile(u_path, 0o640, size=582, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211007-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_second_daily"], scope='class') + def test_first_weekly(self, env): + """First cumulative backup. + """ + mtime = 1633837020 + s0_path = Path("usr", "local", "rnd11.dat") + s0_file = DataRandomFile(s0_path, 0o644, size=528, mtime=mtime) + s0_parent = env.test_data[s0_path.parent] + s0_parent.mtime = mtime + mtime = 1633843260 + s1_path = Path("root", "rnd12.dat") + s1_file = DataRandomFile(s1_path, 0o600, size=17, mtime=mtime) + s1_parent = env.test_data[s1_path.parent] + s1_parent.mtime = mtime + mtime = 1633876920 + u_path = Path("home", "jdoe", "misc", "rnd13.dat") + u_file = DataRandomFile(u_path, 0o644, size=378, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('sys',), [s0_parent, s0_file, s1_parent, s1_file]) + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [s0_file, s1_file, u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211011-weekly.tar.bz2" + env.check_archive(archive_name, 'sys', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='incr', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211011-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys',), 'weekly') + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_first_weekly"], scope='class') + def test_third_daily(self, env): + """First incremental backup in the second week. + """ + mtime = 1634053507 + u_path = Path("home", "jdoe", "misc", "rnd14.dat") + u_file = DataRandomFile(u_path, 0o644, size=763, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + mtime = 1634083500 + s_path = Path("root", "rnd15.dat") + s_file = DataRandomFile(s_path, 0o600, size=165, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + env.add_test_data(('sys',), [s_parent, s_file]) + setup_testdata(env.root, [u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211013-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_third_daily"], scope='class') + def test_second_weekly(self, env): + """Second cumulative backup. + """ + mtime = 1634509129 + u_path = Path("home", "jdoe", "misc", "rnd16.dat") + u_file = DataRandomFile(u_path, 0o644, size=834, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211018-weekly.tar.bz2" + env.check_archive(archive_name, 'sys', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='incr', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211018-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys',), 'weekly') + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_second_weekly"], scope='class') + def test_fourth_daily(self, env): + """First incremental backup in the third week. + """ + mtime = 1634605839 + s_path = Path("root", "rnd18.dat") + s_file = DataRandomFile(s_path, 0o600, size=589, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + mtime = 1634631969 + u_path = Path("home", "jdoe", "misc", "rnd17.dat") + u_file = DataRandomFile(u_path, 0o644, size=568, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + env.add_test_data(('sys',), [s_parent, s_file]) + setup_testdata(env.root, [u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 20, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 20, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211020-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_fourth_daily"], scope='class') + def test_second_monthly(self, env): + """Do the next monthly backup. + """ + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211108-monthly.tar.bz2" + env.check_archive(archive_name, 'sys', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='full', policy='sys') + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211108-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys', 'user'), 'weekly') + + +class TestBackupToolMixedScheduleTypes: + """The schedule types may be freely mixed. + + The backup-tool supports a hierarchy of the schedule types 'full', + 'cumu', and 'incr'. It is not required to use them in that order. + Only the root of the hierarchy must have the type 'full', + otherwise the types may be freely mixed. + + The scenario considered in this test: + - quarterly: full, + - monthly: incr, + - weekly: cumu, + - daily: incr. + """ + + cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[serv] +backupdir = $root/net/backup + +[user] +name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +dirs = $root/%(home)s +excludes = + $root/%(home)s/.cache + $root/%(home)s/.thumbnails + $root/%(home)s/tmp +schedules = quarterly:full/monthly:incr/weekly:cumu/daily:incr +schedule.quarterly.date = Mon *-1,4,7,10-2..8 +schedule.monthly.date = Mon *-*-2..8 +schedule.weekly.date = Mon * +schedule.daily.date = * +""" + + def init_data(self, env): + env.config("net/backup", "var", + schedules=('quarterly', 'monthly', 'weekly', 'daily')) + subst = dict(root=env.root) + cfg_data = string.Template(self.cfg).substitute(subst).encode('ascii') + cfg_path = Path("etc", "backup.cfg") + sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1625363657), + DataContentFile(cfg_path, cfg_data, 0o644, mtime=1625243298), + ] + env.add_test_data(('sys',), sys_data) + user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd00.dat"), + 0o600, size=7964, mtime=1612908655), + DataRandomFile(Path("home", "jdoe", "rnd01.dat"), + 0o600, size=39, mtime=1614947739), + ] + env.add_test_data(('user',), user_data) + excl_data = [ + DataDir(Path("net", "backup"), 0o755, mtime=1625360400), + DataDir(Path("var"), 0o755, mtime=1625360400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + @pytest.mark.dependency() + def test_20210705(self, env): + """Full backup of initial test data. + """ + self.init_data(env) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 5, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210705-quarterly.tar.bz2" + env.check_archive(archive_name, 'user', 'quarterly') + env.add_index(archive_name, 'serv', 'quarterly', + type='full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + @pytest.mark.dependency(depends=["test_20210705"], scope='class') + def test_20210707(self, env): + """Daily incremental backup in the first week. + """ + mtime = 1625562697 + u_path = Path("home", "jdoe", "rnd02.dat") + u_file = DataRandomFile(u_path, 0o600, size=446, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 7, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210707-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210707"], scope='class') + def test_20210709(self, env): + """Second daily incremental backup in the first week. + """ + mtime = 1625743947 + u_path = Path("home", "jdoe", "rnd03.dat") + u_file = DataRandomFile(u_path, 0o600, size=55, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 9, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210709-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210709"], scope='class') + def test_20210712(self, env): + """Weekly cumulative backup. + """ + mtime = 1626043402 + u_path = Path("home", "jdoe", "rnd04.dat") + u_file = DataRandomFile(u_path, 0o600, size=228, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 12, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210712-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210712"], scope='class') + def test_20210714(self, env): + """Daily incremental backup in the second week. + """ + mtime = 1626167376 + u_path = Path("home", "jdoe", "rnd05.dat") + u_file = DataRandomFile(u_path, 0o600, size=263, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 14, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210714-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210714"], scope='class') + def test_20210719(self, env): + """Weekly cumulative backup. + """ + mtime = 1626575481 + u_path = Path("home", "jdoe", "rnd06.dat") + u_file = DataRandomFile(u_path, 0o600, size=287, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 19, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210719-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210719"], scope='class') + def test_20210721(self, env): + """Daily incremental backup in the third week. + """ + mtime = 1626826403 + u_path = Path("home", "jdoe", "rnd07.dat") + u_file = DataRandomFile(u_path, 0o600, size=318, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 21, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210721-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210721"], scope='class') + def test_20210802(self, env): + """Monthly backup. + """ + mtime = 1627806186 + u_path = Path("home", "jdoe", "rnd08.dat") + u_file = DataRandomFile(u_path, 0o600, size=334, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 2, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210802-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + @pytest.mark.dependency(depends=["test_20210802"], scope='class') + def test_20210804(self, env): + """Daily incremental backup. + """ + mtime = 1628026098 + u_path = Path("home", "jdoe", "rnd09.dat") + u_file = DataRandomFile(u_path, 0o600, size=404, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 4, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210804-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210804"], scope='class') + def test_20210809(self, env): + """Weekly cumulative backup. + """ + mtime = 1628460869 + u_path = Path("home", "jdoe", "rnd10.dat") + u_file = DataRandomFile(u_path, 0o600, size=453, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 9, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210809-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210809"], scope='class') + def test_20210811(self, env): + """Daily incremental backup. + """ + mtime = 1628563138 + u_path = Path("home", "jdoe", "rnd11.dat") + u_file = DataRandomFile(u_path, 0o600, size=174, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 11, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210811-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210811"], scope='class') + def test_20210906(self, env): + """Monthly backup. + """ + mtime = 1630827561 + u_path = Path("home", "jdoe", "rnd12.dat") + u_file = DataRandomFile(u_path, 0o600, size=225, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 6, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210906-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + @pytest.mark.dependency(depends=["test_20210906"], scope='class') + def test_20210908(self, env): + """Daily incremental backup. + """ + mtime = 1630986960 + u_path = Path("home", "jdoe", "rnd13.dat") + u_file = DataRandomFile(u_path, 0o600, size=317, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 8, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210908-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210908"], scope='class') + def test_20210913(self, env): + """Weekly cumulative backup. + """ + mtime = 1631419436 + u_path = Path("home", "jdoe", "rnd14.dat") + u_file = DataRandomFile(u_path, 0o600, size=159, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 13, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210913-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210913"], scope='class') + def test_20210915(self, env): + """Daily incremental backup. + """ + mtime = 1631652957 + u_path = Path("home", "jdoe", "rnd15.dat") + u_file = DataRandomFile(u_path, 0o600, size=199, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 15, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210915-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210915"], scope='class') + def test_20210917(self, env): + """Daily incremental backup. + """ + mtime = 1631781786 + u_path = Path("home", "jdoe", "rnd16.dat") + u_file = DataRandomFile(u_path, 0o600, size=24, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 17, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210917-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210917"], scope='class') + def test_20211004(self, env): + """Quarterly full backup. + """ + mtime = 1633264335 + u_path = Path("home", "jdoe", "rnd17.dat") + u_file = DataRandomFile(u_path, 0o600, size=467, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211004-quarterly.tar.bz2" + env.check_archive(archive_name, 'user', 'quarterly') + env.add_index(archive_name, 'serv', 'quarterly', + type='full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + +class TestBackupToolDedup: + """Test the dedup configration option. + """ + + src_dir = Path("root") + src_path = Path("root", "rnd.dat") + lnk_path = Path("root", "rnd_lnk.dat") + cp_path = Path("root", "rnd_cp.dat") + + cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[sys] +name = %(host)s-%(date)s-%(schedule)s-$suffix.tar.bz2 +dirs = + $root/root +backupdir = $root/net/backup +schedules = full/incr +schedule.full.date = Mon *-*-2..8 +schedule.incr.date = Mon * +""" + + def init_data(self, env, dedup): + env.config("net/backup", "var/backup") + subst = dict(root=env.root, suffix=str(dedup)) + cfg = string.Template(self.cfg).substitute(subst) + if dedup: + cfg_path = env.root / "etc" / ("backup-%s.cfg" % dedup) + cfg += "dedup = %s\n" % dedup + else: + cfg_path = env.root / "etc" / "backup.cfg" + cfg_path.parent.mkdir(parents=True, exist_ok=True) + with cfg_path.open("wt") as f: + f.write(cfg) + if not (env.root / self.src_dir).is_dir(): + sys_data = [ + DataDir(self.src_dir, 0o700, mtime=1633274230), + DataFile(self.src_path, 0o600, mtime=1633243020), + ] + env.add_test_data(('sys',), sys_data) + excl_data = [ + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + src_file = env.test_data[self.src_path] + os.link(env.root / self.src_path, env.root / self.lnk_path) + shutil.copy2(env.root / self.src_path, env.root / self.cp_path) + extra_data = [ + DataFile(self.lnk_path, src_file.mode, + mtime=src_file.mtime, checksum=src_file.checksum), + DataFile(self.cp_path, src_file.mode, + mtime=src_file.mtime, checksum=src_file.checksum), + ] + env.add_test_data(('sys',), extra_data) + env.test_data[self.src_dir].create(env.root) + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + @pytest.mark.parametrize("dedup", [None, 'never', 'link', 'content']) + def test_full(self, env, dedup): + """Full backup of initial test data. + """ + self.init_data(env, dedup) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211004-full-%s.tar.bz2" % str(dedup) + env.check_archive(archive_name, 'sys', 'full') + with Archive().open(env.backupdir / archive_name) as archive: + src_path = archive._arcname(env.root / self.src_path) + lnk_path = archive._arcname(env.root / self.lnk_path) + cp_path = archive._arcname(env.root / self.cp_path) + ti_lnk = archive._file.getmember(lnk_path) + ti_cp = archive._file.getmember(cp_path) + if dedup == 'never': + assert ti_lnk.isfile() + assert ti_cp.isfile() + elif dedup is None or dedup == 'link': + assert ti_lnk.islnk() + assert ti_lnk.linkname == src_path + assert ti_cp.isfile() + elif dedup == 'content': + assert ti_lnk.islnk() + assert ti_lnk.linkname == src_path + assert ti_cp.islnk() + assert ti_cp.linkname == src_path diff --git a/tests/test_05_mailarchive_create.py b/tests/test_09_mailarchive_create.py similarity index 100% rename from tests/test_05_mailarchive_create.py rename to tests/test_09_mailarchive_create.py diff --git a/tests/test_05_mailarchive_legacy.py b/tests/test_09_mailarchive_legacy.py similarity index 100% rename from tests/test_05_mailarchive_legacy.py rename to tests/test_09_mailarchive_legacy.py