diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88c3f59..7c0ebd8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/netbox_config_backup/__init__.py b/netbox_config_backup/__init__.py index c03654b..d094da3 100644 --- a/netbox_config_backup/__init__.py +++ b/netbox_config_backup/__init__.py @@ -29,5 +29,15 @@ class NetboxConfigBackup(PluginConfig): ] graphql_schema = 'graphql.schema.schema' + def ready(self, *args, **kwargs): + super().ready() + import sys + if 'rqworker' in sys.argv[1]: + from netbox import settings + from netbox_config_backup.jobs.backup import BackupRunner + from netbox_config_backup.models import BackupJob, Backup + frequency = settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency') / 60 + BackupRunner.enqueue_once(interval=frequency) + config = NetboxConfigBackup diff --git a/netbox_config_backup/backup/__init__.py b/netbox_config_backup/backup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netbox_config_backup/backup/processing.py b/netbox_config_backup/backup/processing.py new file mode 100644 index 0000000..8f12054 --- /dev/null +++ b/netbox_config_backup/backup/processing.py @@ -0,0 +1,116 @@ +import logging +import os +import time +import traceback + +from django.db.models import Q +from django.utils import timezone + +from core.choices import JobStatusChoices +from netbox.api.exceptions import ServiceUnavailable +from netbox_config_backup.models import BackupJob, Backup +from netbox_config_backup.utils.db import close_db +from netbox_config_backup.utils.configs import check_config_save_status +from netbox_config_backup.utils.napalm import napalm_init +from netbox_config_backup.utils.rq import can_backup + +logger = logging.getLogger(f"netbox_config_backup") + + +def remove_stale_backupjobs(job: BackupJob): + pass + +def run_backup(job_id): + close_db() + logger.info(f'Starting backup for job {job_id}') + try: + job = BackupJob.objects.get(pk=job_id) + except Exception as e: + logger.error(f'Unable to load job {job_id}: {e}') + logger.debug(f'\t{traceback.format_exc()}') + raise e + + try: + backup = Backup.objects.get(pk=job.backup.pk) + backup.refresh_from_db() + pid = os.getpid() + + job.status = JobStatusChoices.STATUS_PENDING + job.pid = pid + job.save() + + if not can_backup(backup): + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': f'Cannot backup {backup}'}) + job.full_clean() + job.save() + logger.warning(f'Cannot backup {backup}') + return + + commit = None + try: + ip = backup.ip if backup.ip is not None else backup.device.primary_ip + except Exception as e: + logger.debug(f'{e}: {backup}') + raise e + + if ip: + try: + d = napalm_init(backup.device, ip) + except (TimeoutError, ServiceUnavailable): + job.status = JobStatusChoices.STATUS_FAILED + job.data = {'error': f'Timeout Connecting to {backup.device} with ip {ip}'} + logger.debug = f'Timeout Connecting to {backup.device} with ip {ip}' + job.save() + return + + job.status = JobStatusChoices.STATUS_RUNNING + job.started = timezone.now() + job.save() + try: + status = check_config_save_status(d) + if status is not None: + if status and not backup.config_status: + backup.config_status = status + backup.save() + elif not status and backup.config_status: + backup.config_status = status + backup.save() + elif not status and backup.config_status is None: + backup.config_status = status + backup.save() + elif status and backup.config_status is None: + backup.config_status = status + backup.save() + except Exception as e: + logger.error(f'{backup}: had error setting backup status: {e}') + + configs = d.get_config() + commit = backup.set_config(configs) + + d.close() + logger.info(f'{backup}: Backup complete') + job.status = JobStatusChoices.STATUS_COMPLETED + job.completed = timezone.now() + job.save() + remove_stale_backupjobs(job=job) + else: + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': f'{backup}: No IP set'}) + job.full_clean() + job.save() + logger.debug(f'{backup}: No IP set') + except Exception as e: + logger.error(f'Exception in {job_id}: {e}') + logger.info(f'\t{traceback.format_exc()}') + if job: + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': f'{e}'}) + job.full_clean() + job.save() diff --git a/netbox_config_backup/filtersets.py b/netbox_config_backup/filtersets.py index a3d273a..7c14111 100644 --- a/netbox_config_backup/filtersets.py +++ b/netbox_config_backup/filtersets.py @@ -5,6 +5,7 @@ from django.utils.translation import gettext as _ from netaddr import AddrFormatError +from core.choices import JobStatusChoices from ipam.models import IPAddress from netbox.filtersets import NetBoxModelFilterSet, BaseFilterSet from dcim.models import Device @@ -13,6 +14,17 @@ from utilities.filters import MultiValueCharFilter +class BackupJobFilterSet(BaseFilterSet): + q = django_filters.CharFilter( + method='search', + label=_('Search'), + ) + + class Meta: + model = models.BackupJob + fields = ['id', 'status'] + + class BackupFilterSet(BaseFilterSet): q = django_filters.CharFilter( method='search', diff --git a/netbox_config_backup/forms.py b/netbox_config_backup/forms.py index e8d04cd..9b24fba 100644 --- a/netbox_config_backup/forms.py +++ b/netbox_config_backup/forms.py @@ -3,15 +3,18 @@ from django.forms import CharField from django.utils.translation import gettext as _ +from core.choices import JobStatusChoices from dcim.choices import DeviceStatusChoices from dcim.models import Device from ipam.models import IPAddress from netbox.forms import NetBoxModelForm, NetBoxModelBulkEditForm -from netbox_config_backup.models import Backup +from netbox_config_backup.models import Backup, BackupJob +from utilities.forms import add_blank_choice from utilities.forms.fields import DynamicModelChoiceField, DynamicModelMultipleChoiceField, CommentField __all__ = ( 'BackupForm', + 'BackupJobFilterSetForm', 'BackupFilterSetForm', 'BackupBulkEditForm', ) @@ -59,6 +62,18 @@ def clean(self): raise ValidationError({'device': f'{device}\'s platform ({device.platform}) has no napalm driver'}) +class BackupJobFilterSetForm(forms.Form): + model = BackupJob + field_order = [ + 'q', 'status', + ] + status = forms.MultipleChoiceField( + required=False, + choices=add_blank_choice(JobStatusChoices), + label=_('Status') + ) + + class BackupFilterSetForm(forms.Form): model = Backup field_order = [ diff --git a/netbox_config_backup/jobs/__init__.py b/netbox_config_backup/jobs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netbox_config_backup/jobs/backup.py b/netbox_config_backup/jobs/backup.py new file mode 100644 index 0000000..142b3d1 --- /dev/null +++ b/netbox_config_backup/jobs/backup.py @@ -0,0 +1,149 @@ +import logging +import time +import uuid +import traceback +from datetime import timedelta +from multiprocessing import Process + +from django.db.models import Q +from django.utils import timezone +from rq.job import JobStatus + +from core.choices import JobStatusChoices +from netbox.jobs import JobRunner +from netbox_config_backup.backup.processing import run_backup +from netbox_config_backup.choices import StatusChoices +from netbox_config_backup.models import Backup, BackupJob +from netbox_config_backup.utils.db import close_db +from netbox_config_backup.utils.rq import can_backup + +logger = logging.getLogger(f"netbox_config_backup") + + +class SchedulerRunner(JobRunner): + class Meta: + name = "The scheduler" + + + + +class BackupRunner(JobRunner): + processes = {} + + class Meta: + name = 'The Backup Job Runner' + + def clean_stale_jobs(self): + jobs = BackupJob.objects.order_by('created').filter( + status=JobStatusChoices.ENQUEUED_STATE_CHOICES, + ).prefetch_related('device') + scheduled = jobs.filter(status=JobStatusChoices.STATUS_SCHEDULED) + stale = jobs.filter(scheduled__lt=timezone.now() - timedelta(minutes=30)) + + for job in stale: + if job.pid: + pass + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': 'Job hung'}) + job.save() + job.refresh_from_db() + logger.warning(f'Job {job.backup} appears stuck, deleting') + + for job in scheduled: + if job != scheduled.filter(backup=job.backup).last(): + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': 'Process terminated'}) + job.save() + + def schedule_jobs(self): + backups = Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False) + for backup in backups: + if can_backup(backup): + logger.debug(f'Queuing device {backup.device} for backup') + jobs = BackupJob.objects.filter(backup=backup, status__in=JobStatusChoices.ENQUEUED_STATE_CHOICES) + job = jobs.last() + if job is not None: + job.runner = self.job + job.status = JobStatusChoices.STATUS_SCHEDULED + job.scheduled = timezone.now() + job.save() + else: + job = BackupJob( + runner=self.job, + backup=backup, + status=JobStatusChoices.STATUS_SCHEDULED, + scheduled=timezone.now(), + job_id=uuid.uuid4(), + data={}, + ) + job.full_clean() + job.save() + else: + jobs = BackupJob.objects.filter(backup=backup, status__in=JobStatusChoices.ENQUEUED_STATE_CHOICES) + for job in jobs: + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': f'Cannot queue job'}) + job.save() + + def run_processes(self): + for job in BackupJob.objects.filter(status=JobStatusChoices.STATUS_SCHEDULED): + try: + process = self.fork_process(job) + process.join(1) + except Exception as e: + job.status = JobStatusChoices.STATUS_FAILED + job.data['error'] = str(e) + job.save() + + def fork_process(self, job): + close_db() + process = Process(target=run_backup, args=(job.pk, ), ) + data = { + job.backup.pk: { + 'process': process, + 'backup': job.backup.pk, + 'job': job.pk + } + } + self.processes.update(data) + process.start() + logger.debug(f'Forking process {process.pid} for {job.backup} backup') + return process + + def handle_processes(self): + close_db() + for pk in list(self.processes.keys()): + process = self.processes.get(pk, {}).get('process') + job_pk = self.processes.get(pk, {}).get('job') + backup = self.processes.get(pk, {}).get('backup') + if not process.is_alive(): + logger.debug(f'Terminating process {process.pid} with job pk of {pk} for {backup}') + process.terminate() + del self.processes[pk] + job = BackupJob.objects.filter(pk=job_pk).first() + if job and job.status != JobStatusChoices.STATUS_COMPLETED: + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': 'Process terminated'}) + job.save() + + def run(self, *args, **kwargs): + try: + self.clean_stale_jobs() + self.schedule_jobs() + self.run_processes() + while(True): + self.handle_processes() + if len(self.processes) == 0: + return + time.sleep(1) + except Exception as e: + logger.warning(f'{traceback.format_exc()}') + logger.error(f'{e}') diff --git a/netbox_config_backup/management/commands/enqueue_if_needed.py b/netbox_config_backup/management/commands/enqueue_if_needed.py deleted file mode 100644 index 8810e08..0000000 --- a/netbox_config_backup/management/commands/enqueue_if_needed.py +++ /dev/null @@ -1,12 +0,0 @@ -from django.core.management.base import BaseCommand - - -class Command(BaseCommand): - def handle(self, *args, **options): - from netbox_config_backup.models import Backup, BackupJob - - for backup in Backup.objects.all(): - if BackupJob.needs_enqueue(backup=backup): - BackupJob.enqueue_if_needed(backup=backup) - print(f'Backup: {backup} has been queued') - diff --git a/netbox_config_backup/management/commands/fork.py b/netbox_config_backup/management/commands/fork.py deleted file mode 100644 index cd3881e..0000000 --- a/netbox_config_backup/management/commands/fork.py +++ /dev/null @@ -1,46 +0,0 @@ -import uuid - -from django.core.management.base import BaseCommand -from django.db import transaction -from django.utils import timezone - -from netbox_config_backup.models import BackupJob -from netbox_config_backup.tasks import backup_job -from netbox_config_backup.utils import remove_queued -from netbox_config_backup.utils.rq import can_backup - - -class Command(BaseCommand): - def add_arguments(self, parser): - parser.add_argument('--time', dest='time', help="time") - parser.add_argument('--device', dest='device', help="Device Name") - - def handle(self, *args, **options): - from multiprocessing import Process - import time - def test(i): - self.stdout.write(f"Child {i} is running") - self.stdout.write(f"Child {i} sleeping 10 seconds") - time.sleep(10) - self.stdout.write(f"Child {i} sleep complete") - - processes = {} - for i in range(1, 3): - p = Process(target=test, args=(i,)) - p.start() - p.join(1) - self.stdout.write(f"Child {i} running") - processes.update({p.pid: p}) - - while True: - if len(processes) == 0: - break - for pid in list(processes.keys()): - process = processes.get(pid, None) - if not process.is_alive(): - del processes[pid] - time.sleep(1) - - - - diff --git a/netbox_config_backup/management/commands/listbackups.py b/netbox_config_backup/management/commands/listbackups.py index 1a954d2..d9f75b9 100644 --- a/netbox_config_backup/management/commands/listbackups.py +++ b/netbox_config_backup/management/commands/listbackups.py @@ -1,13 +1,4 @@ -import uuid - from django.core.management.base import BaseCommand -from django.db import transaction -from django.utils import timezone - -from netbox_config_backup.models import BackupJob -from netbox_config_backup.tasks import backup_job -from netbox_config_backup.utils import remove_queued -from netbox_config_backup.utils.rq import can_backup class Command(BaseCommand): diff --git a/netbox_config_backup/management/commands/runbackup.py b/netbox_config_backup/management/commands/runbackup.py index b3a478c..9b846d3 100644 --- a/netbox_config_backup/management/commands/runbackup.py +++ b/netbox_config_backup/management/commands/runbackup.py @@ -5,8 +5,6 @@ from django.utils import timezone from netbox_config_backup.models import BackupJob -from netbox_config_backup.tasks import backup_job -from netbox_config_backup.utils import remove_queued from netbox_config_backup.utils.rq import can_backup @@ -15,18 +13,6 @@ def add_arguments(self, parser): parser.add_argument('--time', dest='time', help="time") parser.add_argument('--device', dest='device', help="Device Name") - def run_backup(self, backup): - if can_backup(backup): - backupjob = backup.jobs.filter(backup__device=backup.device).last() - if backupjob is None: - backupjob = BackupJob.objects.create( - backup=backup, - scheduled=timezone.now(), - uuid=uuid.uuid4() - ) - backup_job(backupjob.pk) - remove_queued(backup) - def handle(self, *args, **options): from netbox_config_backup.models import Backup if options['device']: diff --git a/netbox_config_backup/migrations/0016_add_pid_to_backup_job.py b/netbox_config_backup/migrations/0016_add_pid_to_backup_job.py new file mode 100644 index 0000000..5b5c9e9 --- /dev/null +++ b/netbox_config_backup/migrations/0016_add_pid_to_backup_job.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.8 on 2024-09-30 21:53 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('netbox_config_backup', '0015_backup_comments_backup_description'), + ] + + operations = [ + migrations.AddField( + model_name='backupjob', + name='pid', + field=models.BigIntegerField(blank=True, null=True), + ), + ] diff --git a/netbox_config_backup/migrations/0017_add_job_to_backupjob.py b/netbox_config_backup/migrations/0017_add_job_to_backupjob.py new file mode 100644 index 0000000..f32e81e --- /dev/null +++ b/netbox_config_backup/migrations/0017_add_job_to_backupjob.py @@ -0,0 +1,26 @@ +# Generated by Django 5.0.8 on 2024-10-01 01:52 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0012_job_object_type_optional'), + ('netbox_config_backup', '0016_add_pid_to_backup_job'), + ] + + operations = [ + migrations.AddField( + model_name='backupjob', + name='runner', + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='backup_job', + to='core.job', + ), + ), + ] diff --git a/netbox_config_backup/migrations/0018_move_to_nbmodel.py b/netbox_config_backup/migrations/0018_move_to_nbmodel.py new file mode 100644 index 0000000..29a597a --- /dev/null +++ b/netbox_config_backup/migrations/0018_move_to_nbmodel.py @@ -0,0 +1,42 @@ +# Generated by Django 5.0.8 on 2024-10-01 23:44 + +import taggit.managers +import utilities.json +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('extras', '0121_customfield_related_object_filter'), + ('netbox_config_backup', '0017_add_job_to_backupjob'), + ] + + operations = [ + migrations.AddField( + model_name='backupjob', + name='custom_field_data', + field=models.JSONField( + blank=True, default=dict, encoder=utilities.json.CustomFieldJSONEncoder + ), + ), + migrations.AddField( + model_name='backupjob', + name='last_updated', + field=models.DateTimeField(auto_now=True, null=True), + ), + migrations.AddField( + model_name='backupjob', + name='tags', + field=taggit.managers.TaggableManager( + through='extras.TaggedItem', to='extras.Tag' + ), + ), + migrations.AlterField( + model_name='backupjob', + name='id', + field=models.BigAutoField( + auto_created=True, primary_key=True, serialize=False + ), + ), + ] diff --git a/netbox_config_backup/models/backups.py b/netbox_config_backup/models/backups.py index 313c760..49ed9c0 100644 --- a/netbox_config_backup/models/backups.py +++ b/netbox_config_backup/models/backups.py @@ -9,13 +9,11 @@ from django_rq import get_queue from dcim.models import Device -from core.choices import JobStatusChoices from netbox.models import PrimaryModel from netbox_config_backup.choices import StatusChoices from netbox_config_backup.helpers import get_repository_dir -from netbox_config_backup.utils.rq import remove_queued from ..querysets import BackupQuerySet from ..utils import Differ @@ -58,27 +56,6 @@ def get_absolute_url(self): def __str__(self): return self.name - def delete(self, *args, **kwargs): - queue = get_queue('netbox_config_backup.jobs') - remove_queued(self) - - super().delete(*args, **kwargs) - - def enqueue_if_needed(self): - from netbox_config_backup.utils.rq import enqueue_if_needed - return enqueue_if_needed(self) - - def requeue(self): - self.jobs.filter( - ~Q(status=JobStatusChoices.STATUS_COMPLETED) & - ~Q(status=JobStatusChoices.STATUS_FAILED) & - ~Q(status=JobStatusChoices.STATUS_ERRORED) - ).update( - status=JobStatusChoices.STATUS_FAILED - ) - remove_queued(self) - self.enqueue_if_needed() - def get_config(self, index='HEAD'): from netbox_config_backup.git import repository running = repository.read(f'{self.uuid}.running') @@ -123,7 +100,7 @@ def set_config(self, configs, files=('running', 'startup'), pk=None): else: #logger.debug(f'[{pk}] Saving commit') bc = BackupCommit(sha=commit, time=time) - logger.info(f'{self}: {commit}:{bc.time}') + logger.debug(f'{self}: {commit}:{bc.time}') bc.save() for change in log.get('changes', []): diff --git a/netbox_config_backup/models/jobs.py b/netbox_config_backup/models/jobs.py index b80ac00..33418bf 100644 --- a/netbox_config_backup/models/jobs.py +++ b/netbox_config_backup/models/jobs.py @@ -3,14 +3,27 @@ from django.db import models from django.db.models import ForeignKey from django.utils import timezone +from django.utils.translation import gettext as _ + from django_rq import get_queue + from core.choices import JobStatusChoices +from netbox.models import NetBoxModel +from utilities.querysets import RestrictedQuerySet from .abstract import BigIDModel logger = logging.getLogger(f"netbox_config_backup") -class BackupJob(BigIDModel): +class BackupJob(NetBoxModel): + runner = models.ForeignKey( + verbose_name=_('Job Run'), + to='core.Job', + on_delete=models.SET_NULL, + related_name='backup_job', + null=True, + blank=True + ) backup = ForeignKey( to='Backup', on_delete=models.CASCADE, @@ -18,6 +31,11 @@ class BackupJob(BigIDModel): null=False, related_name='jobs', ) + pid = models.BigIntegerField( + verbose_name=_('PID'), + null=True, + blank=True, + ) created = models.DateTimeField( auto_now_add=True ) @@ -46,19 +64,11 @@ class BackupJob(BigIDModel): unique=True ) + objects = RestrictedQuerySet.as_manager() + def __str__(self): return str(self.job_id) - def delete(self, using=None, keep_parents=False): - queue = get_queue('netbox_config_backup.jobs') - - job = queue.fetch_job(f'{self.job_id}') - if job is not None: - job.cancel() - job.remove() - - super().delete(using=using, keep_parents=keep_parents) - @property def queue(self): return get_queue('netbox_config_backup.jobs') @@ -80,50 +90,4 @@ def set_status(self, status): """ self.status = status if status in JobStatusChoices.TERMINAL_STATE_CHOICES: - self.completed = timezone.now() - - def reschedule(self, time): - """ - Reschedule a job - """ - if self.status == JobStatusChoices.STATUS_PENDING: - self.scheduled = time - job = self.queue.fetch_job(f'{self.job_id}') - self.queue.schedule(job, time) - else: - raise Exception('Job is not in a state for rescheduling') - - @classmethod - def enqueue(cls, backup, delay=None): - from netbox_config_backup.utils import enqueue - return enqueue(backup, delay) - - @classmethod - def enqueue_if_needed(cls, backup, delay=None, job_id=None): - from netbox_config_backup.utils import enqueue_if_needed - return enqueue_if_needed(backup, delay, job_id) - - @classmethod - def needs_enqueue(cls, backup, job_id=None): - from netbox_config_backup.utils import needs_enqueue - return needs_enqueue(backup, job_id) - - @classmethod - def is_running(cls, backup, job_id=None): - from netbox_config_backup.utils import is_running - return is_running(backup, job_id) - - @classmethod - def is_queued(cls, backup, job_id=None): - from netbox_config_backup.utils import is_queued - return is_queued(backup, job_id) - - @classmethod - def remove_orphaned(cls): - from netbox_config_backup.utils import remove_orphaned - return remove_orphaned() - - @classmethod - def remove_queued(cls, backup): - from netbox_config_backup.utils import remove_queued - return remove_queued() \ No newline at end of file + self.completed = timezone.now() \ No newline at end of file diff --git a/netbox_config_backup/navigation.py b/netbox_config_backup/navigation.py index 43ef357..bb3cf88 100644 --- a/netbox_config_backup/navigation.py +++ b/netbox_config_backup/navigation.py @@ -1,6 +1,13 @@ from netbox.choices import ButtonColorChoices from netbox.plugins import PluginMenuItem, PluginMenuButton, PluginMenu +jobs = PluginMenuItem( + link='plugins:netbox_config_backup:backupjob_list', + link_text='Jobs', + permissions=['netbox_config_backup.view_backups'], + buttons=[] +) + assigned = PluginMenuItem( link='plugins:netbox_config_backup:backup_list', link_text='Devices', @@ -25,6 +32,7 @@ menu = PluginMenu( label="Configuration Backup", groups=( - ('Backup Jobs', (assigned, unassigned)), + ('Backup Jobs', (jobs, )), + ('Backups', (assigned, unassigned)), ) ) diff --git a/netbox_config_backup/tables.py b/netbox_config_backup/tables.py index bc2fb8c..52eb3d6 100644 --- a/netbox_config_backup/tables.py +++ b/netbox_config_backup/tables.py @@ -27,6 +27,32 @@ def header(self): return '' +class BackupJobTable(BaseTable): + pk = columns.ToggleColumn( + + ) + backup = tables.Column( + linkify=True, + verbose_name='Backup' + ) + created = tables.DateTimeColumn() + scheduled = tables.DateTimeColumn() + started = tables.DateTimeColumn() + completed = tables.DateTimeColumn() + + class Meta(BaseTable.Meta): + model = Backup + fields = ( + 'pk', 'backup', 'pid', 'created', 'scheduled', 'started', 'completed', 'status' + ) + default_columns = ( + 'pk', 'backup', 'pid', 'created', 'scheduled', 'started', 'completed', 'status' + ) + + def render_backup_count(self, value): + return f'{value.count()}' + + class BackupTable(BaseTable): pk = columns.ToggleColumn( diff --git a/netbox_config_backup/tasks.py b/netbox_config_backup/tasks.py index 0643139..e69de29 100644 --- a/netbox_config_backup/tasks.py +++ b/netbox_config_backup/tasks.py @@ -1,195 +0,0 @@ -import traceback -from datetime import timedelta - -from django.utils import timezone -from netmiko import NetmikoAuthenticationException, NetmikoTimeoutException - -from core.choices import JobStatusChoices -from netbox import settings -from netbox.api.exceptions import ServiceUnavailable -from netbox_config_backup.models import BackupJob -from netbox_config_backup.utils.configs import check_config_save_status -from netbox_config_backup.utils.logger import get_logger -from netbox_config_backup.utils.rq import can_backup - - -def napalm_init(device, ip=None, extra_args={}): - from netbox import settings - - username = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( - 'NAPALM_USERNAME', None - ) - password = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( - 'NAPALM_PASSWORD', None - ) - timeout = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( - 'NAPALM_TIMEOUT', None - ) - optional_args = ( - settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}) - .get('NAPALM_ARGS', []) - .copy() - ) - - if device and device.platform and device.platform.napalm.napalm_args is not None: - optional_args.update(device.platform.napalm.napalm_args) - if extra_args != {}: - optional_args.update(extra_args) - - # Check for primary IP address from NetBox object - if ip is not None: - host = str(ip.address.ip) - elif device.primary_ip and device.primary_ip is not None: - host = str(device.primary_ip.address.ip) - else: - raise ServiceUnavailable("This device does not have a primary IP address") - - # Check that NAPALM is installed - try: - import napalm - from napalm.base.exceptions import ModuleImportError - except ModuleNotFoundError as e: - if getattr(e, 'name') == 'napalm': - raise ServiceUnavailable( - "NAPALM is not installed. Please see the documentation for instructions." - ) - raise e - - # Validate the configured driver - try: - driver = napalm.get_network_driver(device.platform.napalm.napalm_driver) - except ModuleImportError: - raise ServiceUnavailable( - "NAPALM driver for platform {} not found: {}.".format( - device.platform, device.platform.napalm.napalm_driver - ) - ) - - # Connect to the device - d = driver( - hostname=host, - username=username, - password=password, - timeout=timeout, - optional_args=optional_args, - ) - try: - d.open() - except Exception as e: - if isinstance(e, NetmikoAuthenticationException): - logger.info('Authentication error') - elif isinstance(e, NetmikoTimeoutException): - logger.info('Connection error') - raise ServiceUnavailable( - "Error connecting to the device at {}: {}".format(host, e) - ) - - return d - - -def backup_config(backup, pk=None): - commit = None - if backup.device: - ip = backup.ip if backup.ip is not None else backup.device.primary_ip - else: - ip = None - if not can_backup(backup): - raise Exception(f'Cannot backup {backup}') - - if backup.device is not None and ip is not None: - logger.info(f'{backup}: Backup started') - # logger.debug(f'[{pk}] Connecting') - d = napalm_init(backup.device, ip) - # logger.debug(f'[{pk} - - try: - status = check_config_save_status(d) - if status is not None: - if status and not backup.config_status: - backup.config_status = status - backup.save() - elif not status and backup.config_status: - backup.config_status = status - backup.save() - elif not status and backup.config_status is None: - backup.config_status = status - backup.save() - elif status and backup.config_status is None: - backup.config_status = status - backup.save() - except Exception as e: - logger.error(f'{backup}: had error setting backup status: {e}') - - # logger.debug(f'[{pk}] Getting config') - configs = d.get_config() - # logger.debug(f'[{pk}] Finished config get') - - # logger.debug(f'[{pk}] Setting config') - commit = backup.set_config(configs, pk=pk) - # logger.debug(f'[{pk}] Finished config set') - - d.close() - logger.info(f'{backup}: Backup complete') - else: - logger.info(f'{backup}: No IP set') - - return commit - - -def backup_job(pk): - import netmiko - - try: - job_result = BackupJob.objects.get(pk=pk) - except BackupJob.DoesNotExist: - logger.error(f'Cannot locate job (Id: {pk}) in DB') - raise Exception(f'Cannot locate job (Id: {pk}) in DB') - backup = job_result.backup - - if not can_backup(backup): - logger.warning(f'Cannot backup due to additional factors') - return 1 - delay = timedelta( - seconds=settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency') - ) - - job_result.started = timezone.now() - job_result.status = JobStatusChoices.STATUS_RUNNING - job_result.save() - try: - # logger.debug(f'[{pk}] Starting backup') - commit = backup_config(backup, pk=pk) - # logger.debug(f'[{pk}] Finished backup') - - job_result.set_status(JobStatusChoices.STATUS_COMPLETED) - job_result.data = {'commit': f'{commit}' if commit is not None else ''} - job_result.set_status(JobStatusChoices.STATUS_COMPLETED) - # Enqueue next job if one doesn't exist - try: - # logger.debug(f'[{pk}] Starting Enqueue') - BackupJob.objects.filter(backup=backup).exclude( - status__in=JobStatusChoices.TERMINAL_STATE_CHOICES - ).update(status=JobStatusChoices.STATUS_FAILED) - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - # logger.debug(f'[{pk}] Finished Enqueue') - except Exception as e: - logger.error(f'Job Enqueue after completion failed for job: {backup}') - logger.error(f'\tException: {e}') - except netmiko.exceptions.ReadTimeout as e: - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - logger.warning(f'Netmiko read timeout on job: {backup}') - except ServiceUnavailable as e: - logger.info(f'Napalm service read failure on job: {backup} ({e})') - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - except Exception as e: - logger.error(f'Uncaught Exception on job: {backup}') - logger.error(e) - logger.warning(traceback.format_exc()) - job_result.set_status(JobStatusChoices.STATUS_ERRORED) - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - - # logger.debug(f'[{pk}] Saving result') - job_result.save() - - -logger = get_logger() diff --git a/netbox_config_backup/template_content.py b/netbox_config_backup/template_content.py index e504487..43107e3 100644 --- a/netbox_config_backup/template_content.py +++ b/netbox_config_backup/template_content.py @@ -31,10 +31,6 @@ def build_table(instance): instance = devices.first() table = build_table(instance) - if BackupJob.is_queued(instance) is False: - logger.debug(f'{instance}: Queuing Job') - BackupJob.enqueue(instance) - if htmx_partial(request): return self.render('htmx/table.html', extra_context={ 'object': instance, diff --git a/netbox_config_backup/templates/netbox_config_backup/backupjob.html b/netbox_config_backup/templates/netbox_config_backup/backupjob.html new file mode 100644 index 0000000..07505c0 --- /dev/null +++ b/netbox_config_backup/templates/netbox_config_backup/backupjob.html @@ -0,0 +1,34 @@ +{% extends 'generic/object.html' %} +{% load helpers %} + +{% block subtitle %} +
+{% endblock %} + +{% block content %} +
+
+
+
+ Backup Job +
+
+ + + + + + + + + +
Backup{{ object.backup|linkify }}
Status{{ object.status }}
+
+
+ {% include 'inc/panels/comments.html' %} +
+ +
+
+
+{% endblock %} \ No newline at end of file diff --git a/netbox_config_backup/urls.py b/netbox_config_backup/urls.py index 9394638..a1e69da 100644 --- a/netbox_config_backup/urls.py +++ b/netbox_config_backup/urls.py @@ -5,6 +5,7 @@ from .models import Backup urlpatterns = [ + path('jobs/', views.BackupJobListView.as_view(), name='backupjob_list'), path('unassigned/', views.UnassignedBackupListView.as_view(), name='unassignedbackup_list'), path('devices/', views.BackupListView.as_view(), name='backup_list'), path('devices/add/', views.BackupEditView.as_view(), name='backup_add'), diff --git a/netbox_config_backup/utils/__init__.py b/netbox_config_backup/utils/__init__.py index a171750..a6de2f6 100644 --- a/netbox_config_backup/utils/__init__.py +++ b/netbox_config_backup/utils/__init__.py @@ -1,15 +1,7 @@ from .backups import get_backup_tables from .git import Differ -from .rq import enqueue_if_needed, enqueue, needs_enqueue, remove_queued, remove_orphaned, is_queued, is_running __all__ = ( 'get_backup_tables', 'Differ', - 'enqueue', - 'enqueue_if_needed', - 'needs_enqueue', - 'remove_queued', - 'remove_orphaned', - 'is_running', - 'is_queued', ) \ No newline at end of file diff --git a/netbox_config_backup/utils/configs.py b/netbox_config_backup/utils/configs.py index 336a386..293279c 100644 --- a/netbox_config_backup/utils/configs.py +++ b/netbox_config_backup/utils/configs.py @@ -6,7 +6,7 @@ def check_config_save_status(d): - logger.info(f'Switch: {d.hostname}') + logger.debug(f'Switch: {d.hostname}') platform = { 'ios': { 'running': { diff --git a/netbox_config_backup/utils/db.py b/netbox_config_backup/utils/db.py new file mode 100644 index 0000000..e560b15 --- /dev/null +++ b/netbox_config_backup/utils/db.py @@ -0,0 +1,5 @@ +from django import db + + +def close_db(): + db.connections.close_all() diff --git a/netbox_config_backup/utils/napalm.py b/netbox_config_backup/utils/napalm.py new file mode 100644 index 0000000..bdca7b8 --- /dev/null +++ b/netbox_config_backup/utils/napalm.py @@ -0,0 +1,80 @@ +import logging +from netmiko import NetmikoAuthenticationException, NetmikoTimeoutException + +from netbox.api.exceptions import ServiceUnavailable + +logger = logging.getLogger(f"netbox_config_backup") + + +def napalm_init(device, ip=None, extra_args={}): + from netbox import settings + + username = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( + 'NAPALM_USERNAME', None + ) + password = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( + 'NAPALM_PASSWORD', None + ) + timeout = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( + 'NAPALM_TIMEOUT', None + ) + optional_args = ( + settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}) + .get('NAPALM_ARGS', []) + .copy() + ) + + if device and device.platform and device.platform.napalm.napalm_args is not None: + optional_args.update(device.platform.napalm.napalm_args) + if extra_args != {}: + optional_args.update(extra_args) + + # Check for primary IP address from NetBox object + if ip is not None: + host = str(ip.address.ip) + elif device.primary_ip and device.primary_ip is not None: + host = str(device.primary_ip.address.ip) + else: + raise ServiceUnavailable("This device does not have a primary IP address") + + # Check that NAPALM is installed + try: + import napalm + from napalm.base.exceptions import ModuleImportError + except ModuleNotFoundError as e: + if getattr(e, 'name') == 'napalm': + raise ServiceUnavailable( + "NAPALM is not installed. Please see the documentation for instructions." + ) + raise e + + # Validate the configured driver + try: + driver = napalm.get_network_driver(device.platform.napalm.napalm_driver) + except ModuleImportError: + raise ServiceUnavailable( + "NAPALM driver for platform {} not found: {}.".format( + device.platform, device.platform.napalm.napalm_driver + ) + ) + + # Connect to the device + d = driver( + hostname=host, + username=username, + password=password, + timeout=timeout, + optional_args=optional_args, + ) + try: + d.open() + except Exception as e: + if isinstance(e, NetmikoAuthenticationException): + logger.info('Authentication error') + elif isinstance(e, NetmikoTimeoutException): + logger.info('Connection error') + raise ServiceUnavailable( + "Error connecting to the device at {}: {}".format(host, e) + ) + + return d diff --git a/netbox_config_backup/utils/rq.py b/netbox_config_backup/utils/rq.py index 80fc4a6..8a801e1 100644 --- a/netbox_config_backup/utils/rq.py +++ b/netbox_config_backup/utils/rq.py @@ -43,163 +43,3 @@ def can_backup(backup): return False return True - -def enqueue(backup, delay=None): - from netbox_config_backup.models import BackupJob - - scheduled = timezone.now() - if delay is not None: - logger.info(f'{backup}: Scheduling for: {scheduled + delay} at {scheduled} ') - scheduled = timezone.now() + delay - - result = BackupJob.objects.create( - backup=backup, - job_id=uuid.uuid4(), - scheduled=scheduled, - ) - queue = get_queue('netbox_config_backup.jobs') - if delay is None: - logger.debug(f'{backup}: Enqueued') - job = queue.enqueue( - 'netbox_config_backup.tasks.backup_job', - description=f'{backup.uuid}', - job_id=str(result.job_id), - pk=result.pk, - ) - logger.info(f'{backup}: {result.job_id}') - else: - logger.debug(f'{backup}: Enqueued') - job = queue.enqueue_in( - delay, - 'netbox_config_backup.tasks.backup_job', - description=f'{backup.uuid}', - job_id=str(result.job_id), - pk=result.pk - ) - logger.info(f'{backup}: {result.job_id}') - - return job - - -def enqueue_if_needed(backup, delay=None, job_id=None): - if needs_enqueue(backup, job_id=job_id): - return enqueue(backup, delay=delay) - return False - - -def needs_enqueue(backup, job_id=None): - queue = get_queue('netbox_config_backup.jobs') - scheduled = queue.scheduled_job_registry - started = queue.started_job_registry - - scheduled_jobs = scheduled.get_job_ids() - started_jobs = started.get_job_ids() - - if not can_backup(backup): - return False - elif is_queued(backup, job_id): - logger.info(f'Backup already queued for {backup}') - return False - - return True - - -def is_running(backup, job_id=None): - queue = get_queue('netbox_config_backup.jobs') - - jobs = backup.jobs.all() - queued = jobs.filter(status__in=[JobStatusChoices.STATUS_RUNNING]) - - if job_id is not None: - queued.exclude(job_id=job_id) - - for backupjob in queued.all(): - job = queue.fetch_job(f'{backupjob.job_id}') - if job and job.is_started and job.id in queue.started_job_registry.get_job_ids() + queue.get_job_ids(): - return True - elif job and job.is_started and job.id not in queue.started_job_registry.get_job_ids(): - status = { - 'is_canceled': job.is_canceled, - 'is_deferred': job.is_deferred, - 'is_failed': job.is_failed, - 'is_finished': job.is_finished, - 'is_queued': job.is_queued, - 'is_scheduled': job.is_scheduled, - 'is_started': job.is_started, - 'is_stopped': job.is_stopped, - } - job.cancel() - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - logger.warning(f'{backup}: Job in started queue but not in a registry, cancelling {status}') - elif job and job.is_canceled: - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - return False - - -def get_scheduled(backup, job_id=None): - queue = get_queue('netbox_config_backup.jobs') - - scheduled_jobs = queue.scheduled_job_registry.get_job_ids() - started_jobs = queue.started_job_registry.get_job_ids() - queued_jobs = queue.get_job_ids() - - jobs = backup.jobs.all() - queued = jobs.filter(status__in=[JobStatusChoices.STATUS_RUNNING, JobStatusChoices.STATUS_PENDING]) - - if job_id is not None: - queued.exclude(job_id=job_id) - - for backupjob in queued.all(): - job = queue.fetch_job(f'{backupjob.job_id}') - if job and (job.is_scheduled or job.is_queued) and job.id in scheduled_jobs + started_jobs + queued_jobs: - if job.enqueued_at is not None: - return job.enqueued_at - else: - return queue.scheduled_job_registry.get_scheduled_time(job) - elif job and (job.is_scheduled or job.is_queued) and job.id not in scheduled_jobs + started_jobs: - status = { - 'is_canceled': job.is_canceled, - 'is_deferred': job.is_deferred, - 'is_failed': job.is_failed, - 'is_finished': job.is_finished, - 'is_queued': job.is_queued, - 'is_scheduled': job.is_scheduled, - 'is_started': job.is_started, - 'is_stopped': job.is_stopped, - } - job.cancel() - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - logger.warning(f'{backup}: Job in scheduled or started queue but not in a registry, cancelling {status} {scheduled_jobs + started_jobs + queued_jobs}') - elif job and job.is_canceled: - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - return None - - -def is_queued(backup, job_id=None): - if get_scheduled(backup, job_id) is not None: - return True - return False - - -def remove_orphaned(): - queue = get_queue('netbox_config_backup.jobs') - registry = ScheduledJobRegistry(queue=queue) - - for job_id in registry.get_job_ids(): - try: - BackupJob.objects.get(job_id=job_id) - except BackupJob.DoesNotExist: - registry.remove(job_id) - - -def remove_queued(backup): - queue = get_queue('netbox_config_backup.jobs') - registry = ScheduledJobRegistry(queue=queue) - for job_id in registry.get_job_ids(): - job = queue.fetch_job(f'{job_id}') - if job.description == f'{backup.uuid}': - registry.remove(f'{job_id}') diff --git a/netbox_config_backup/views.py b/netbox_config_backup/views.py index 6109d51..428a57b 100644 --- a/netbox_config_backup/views.py +++ b/netbox_config_backup/views.py @@ -9,18 +9,27 @@ from core.choices import JobStatusChoices from netbox.views.generic import ObjectDeleteView, ObjectEditView, ObjectView, ObjectListView, ObjectChildrenView, \ BulkEditView, BulkDeleteView -from netbox_config_backup.filtersets import BackupFilterSet, BackupsFilterSet +from netbox_config_backup.filtersets import BackupFilterSet, BackupsFilterSet, BackupJobFilterSet -from netbox_config_backup.forms import BackupForm, BackupFilterSetForm, BackupBulkEditForm +from netbox_config_backup.forms import BackupForm, BackupFilterSetForm, BackupBulkEditForm, BackupJobFilterSetForm from netbox_config_backup.git import GitBackup from netbox_config_backup.models import Backup, BackupJob, BackupCommitTreeChange, BackupCommit, BackupObject -from netbox_config_backup.tables import BackupTable, BackupsTable +from netbox_config_backup.tables import BackupTable, BackupsTable, BackupJobTable from netbox_config_backup.utils import get_backup_tables, Differ from utilities.views import register_model_view, ViewTab logger = logging.getLogger(f"netbox_config_backup") +class BackupJobListView(ObjectListView): + queryset = BackupJob.objects.all() + + filterset = BackupJobFilterSet + filterset_form = BackupJobFilterSetForm + table = BackupJobTable + action_buttons = () + + class BackupListView(ObjectListView): queryset = Backup.objects.filter(device__isnull=False).default_annotate() @@ -46,10 +55,6 @@ class BackupView(ObjectView): def get_extra_context(self, request, instance): - if BackupJob.is_queued(instance) is False: - logger.debug(f'{instance}: Queuing Job') - BackupJob.enqueue_if_needed(instance) - jobs = BackupJob.objects.filter(backup=instance).order_by() is_running = True if jobs.filter(status=JobStatusChoices.STATUS_RUNNING).count() > 0 else False is_pending = True if jobs.filter(status=JobStatusChoices.STATUS_PENDING).count() > 0 else False @@ -62,7 +67,6 @@ def get_extra_context(self, request, instance): status = { 'status': job_status, - 'scheduled': BackupJob.is_queued(instance), 'next_attempt': instance.next_attempt, 'last_job': instance.jobs.filter(completed__isnull=False).last(), 'last_success': instance.last_backup, diff --git a/pyproject.toml b/pyproject.toml index b0e759a..2055e1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ description = "A NetBox Switch Configuration Backup Plugin" readme = "README.md" requires-python = ">=3.10" keywords = ["netbox-plugin", ] -version = "2.1.1" +version = "2.1.2-alpha1" license = {file = "LICENSE"} classifiers = [ "Programming Language :: Python :: 3",