Skip to content

Commit

Permalink
archive: add plugin interface
Browse files Browse the repository at this point in the history
Extend the archive command to clean up a non local archive using a
custom plugin. Therefore the plugin manifest must contain
'archiveAccessors'. Each entry should point to a class inheriting from
'BaseArchiveAccess' providing the appropriate methods to access the
custom archive.

Fixes BobBuildTool#340.
  • Loading branch information
rhubert committed Jul 29, 2022
1 parent 5008f01 commit e0709d5
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 15 deletions.
73 changes: 73 additions & 0 deletions contrib/plugins/artifactoryArchiveAccess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from bob.archive_access import BaseArchiveAccess

from artifactory import ArtifactoryPath
import os
import tempfile
import datetime
import calendar
import struct

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

class Artifactory(BaseArchiveAccess):
def __init__(self):
self.__url = "https://artifactory/bobs_cache"
print("Using Artifactory Archive @ " + self.__url)

def get(self, path):
out = tempfile.NamedTemporaryFile("wb", delete=False)
try:
archive = ArtifactoryPath(self.__url + path, verify=False)
with archive.open() as fd:
out.write(fd.read())

except Exception as e:
logging.error(traceback.format_exc())
out.close()
return out.name

def removeTmp(self, tmp):
# remove the tmp file
if tmp is not None and os.path.exists(tmp):
os.unlink(tmp)

def listdir(self, path):
if path != ".":
base = self.__url + path
else:
base = self.__url
if not base.endswith("/"):
base += "/"
self.__path = ArtifactoryPath(base, verify=False)
ret = [ str(p).replace(base, "") for p in self.__path ]
return ret

def binStat(self, path):
archive = ArtifactoryPath(self.__url + path, verify=False)
# Get FileStat
stat = archive.stat()
ctime = calendar.timegm(stat.ctime.timetuple())
mtime = calendar.timegm(stat.mtime.timetuple())
size = stat.size
archive = ArtifactoryPath(self.__url + path, verify=False)
return struct.pack('=qqQ64s', ctime, mtime, stat.size, bytes(stat.sha256, 'utf-8'))

def unlink(self, path):
archive = ArtifactoryPath(self.__url + path, verify=False)
if archive.exists():
archive.unlink()

def getSize(self,path):
archive = ArtifactoryPath(self.__url + path, verify=False)
if archive.exists():
return archive.stat().size

ArtifactoryAccess = Artifactory()

manifest = {
'apiVersion' : "0.21",
'archiveAccessors' : {
'Artifactory' : ArtifactoryAccess
}
}
22 changes: 22 additions & 0 deletions pym/bob/archive_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class BaseArchiveAccess:
"""Base class for Archive Access handlers.
"""
def get(self, path):
"""Get the package 'path' from the archive.
Return the path the a local accessable archive file."""
return ""
def removeTmp(self, path):
"""Remove the temporary file returned by 'get'"""
return None
def listdir(self, path):
"""Return a list of directory entries"""
return None
def getSize(self,path):
"""Return the file size (in bytes) for 'path'"""
return None
def unlink(self, path):
"""Unlink 'path' from archive"""
return None
def binStat(self, path):
"""Return binary stat for 'path'"""
return None
72 changes: 57 additions & 15 deletions pym/bob/cmds/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

from ..audit import Audit
from ..errors import BobError
from ..input import RecipeSet
from ..utils import binStat, asHexStr, infixBinaryOp
from ..archive_access import BaseArchiveAccess
import argparse
import gzip
import json
Expand All @@ -20,14 +22,29 @@
# need to enable this for nested expression parsing performance
pyparsing.ParserElement.enablePackrat()

class LocalAccessor(BaseArchiveAccess):
def get(self, path):
return path
def removeTmp(self, path):
return None
def listdir(self, path):
return os.listdir(path)
def getSize(self,path):
return os.stat(path).st_size
def unlink(self, path):
os.unlink(path)
def binStat(self, path):
return binStat(path)

class ArchiveScanner:
CUR_VERSION = 2

def __init__(self):
def __init__(self, accessor):
self.__dirSchema = re.compile(r'[0-9a-zA-Z]{2}')
self.__archiveSchema = re.compile(r'[0-9a-zA-Z]{36,}-1.tgz')
self.__db = None
self.__cleanup = False
self.__accessor = accessor

def __enter__(self):
try:
Expand Down Expand Up @@ -80,18 +97,20 @@ def scan(self, verbose):
try:
found = False
self.__db.execute("BEGIN")
for l1 in os.listdir("."):
for l1 in self.__accessor.listdir("."):
if not self.__dirSchema.fullmatch(l1): continue
for l2 in os.listdir(l1):
for l2 in self.__accessor.listdir(l1):
if not self.__dirSchema.fullmatch(l2): continue
l2 = os.path.join(l1, l2)
for l3 in os.listdir(l2):
for l3 in self.__accessor.listdir(l2):
m = self.__archiveSchema.fullmatch(l3)
if not m: continue
found = True
self.__scan(os.path.join(l2, l3), verbose)
except OSError as e:
raise BobError("Error scanning archive: " + str(e))
except Exception as e:
raise BobError("Error: " + str(e))
finally:
self.__db.execute("END")
if verbose and not found:
Expand All @@ -101,8 +120,9 @@ def scan(self, verbose):
return found

def __scan(self, fileName, verbose):
tmpFileName = None
try:
st = binStat(fileName)
st = self.__accessor.binStat(fileName)
bidHex, sep, suffix = fileName.partition("-")
bid = bytes.fromhex(bidHex[0:2] + bidHex[3:5] + bidHex[6:])

Expand All @@ -116,9 +136,10 @@ def __scan(self, fileName, verbose):
self.__db.execute("DELETE FROM files WHERE bid=?",
(bid,))

tmpFileName = self.__accessor.get(fileName)
# read audit trail
if verbose: print("scan", fileName)
with tarfile.open(fileName, errorlevel=1) as tar:
with tarfile.open(tmpFileName, errorlevel=1) as tar:
# validate
if tar.pax_headers.get('bob-archive-vsn') != "1":
print("Not a Bob archive:", fileName, "Ignored!")
Expand All @@ -135,7 +156,7 @@ def __scan(self, fileName, verbose):
# read audit trail
auditJsonGz = tar.extractfile(f)
auditJson = gzip.GzipFile(fileobj=auditJsonGz)
audit = Audit.fromByteStream(auditJson, fileName)
audit = Audit.fromByteStream(auditJson, tmpFileName)

# import data
artifact = audit.getArtifact()
Expand All @@ -152,6 +173,10 @@ def __scan(self, fileName, verbose):
raise BobError("Cannot read {}: {}".format(fileName, str(e)))
except OSError as e:
raise BobError(str(e))
except Exception as e:
raise BobError("Error: " + str(e))
finally:
self.__accessor.removeTmp(tmpFileName)

def remove(self, bid):
self.__cleanup = True
Expand Down Expand Up @@ -386,22 +411,22 @@ def query(scanner, expressions):
return retained


def doArchiveScan(argv):
def doArchiveScan(accessor, argv):
parser = argparse.ArgumentParser(prog="bob archive scan")
parser.add_argument("-v", "--verbose", action='store_true',
help="Verbose operation")
parser.add_argument("-f", "--fail", action='store_true',
help="Return a non-zero error code in case of errors")
args = parser.parse_args(argv)

scanner = ArchiveScanner()
scanner = ArchiveScanner(accessor)
with scanner:
if not scanner.scan(args.verbose) and args.fail:
sys.exit(1)


# meta.package == "root" && build.date > "2017-06-19" LIMIT 5 ORDER BY build.date ASC
def doArchiveClean(argv):
def doArchiveClean(accessor, argv):
parser = argparse.ArgumentParser(prog="bob archive clean")
parser.add_argument('expression', nargs='+',
help="Expression of artifacts that shall be kept")
Expand All @@ -415,7 +440,7 @@ def doArchiveClean(argv):
help="Return a non-zero error code in case of errors")
args = parser.parse_args(argv)

scanner = ArchiveScanner()
scanner = ArchiveScanner(accessor)
with scanner:
if not args.noscan:
if not scanner.scan(args.verbose) and args.fail:
Expand All @@ -435,24 +460,29 @@ def doArchiveClean(argv):
todo.update(scanner.getReferencedBuildIds(n))

# Third pass: remove everything that is *not* retained
totalRemoved = 0
for bid in scanner.getBuildIds():
if bid in retained: continue
victim = asHexStr(bid)
victim = os.path.join(victim[0:2], victim[2:4], victim[4:] + "-1.tgz")
if args.dry_run:
print(victim)
totalRemoved += accessor.getSize(victim)
else:
try:
if args.verbose:
print("rm", victim)
os.unlink(victim)
totalRemoved += accessor.getSize(victim)
accessor.unlink(victim)
except FileNotFoundError:
pass
except OSError as e:
raise BobError("Cannot remove {}: {}".format(victim, str(e)))
scanner.remove(bid)
print("{} {} Bytes from archive".format ("Would remove " if args.dry_run else "Removed",
totalRemoved))

def doArchiveFind(argv):
def doArchiveFind(accessor, argv):
parser = argparse.ArgumentParser(prog="bob archive find")
parser.add_argument('expression', nargs='+',
help="Expression that artifacts need to match")
Expand All @@ -464,7 +494,7 @@ def doArchiveFind(argv):
help="Return a non-zero error code in case of errors")
args = parser.parse_args(argv)

scanner = ArchiveScanner()
scanner = ArchiveScanner(accessor)
with scanner:
if not args.noscan:
if not scanner.scan(args.verbose) and args.fail:
Expand Down Expand Up @@ -492,14 +522,26 @@ def doArchive(argv, bobRoot):
bob archive {}
""".format(subHelp))
parser.add_argument('-a', '--accessor', nargs='?', default=None, help="Archive Accessor (plugin)")
parser.add_argument('subcommand', help="Subcommand")
parser.add_argument('args', nargs=argparse.REMAINDER,
help="Arguments for subcommand")

args = parser.parse_args(argv)

if args.accessor:
recipes = RecipeSet()
recipes.parse()
accessors = recipes.getArchiveAccessors()
if not args.accessor in accessors:
parser.error("Unknown archive accessor '{}'".format(args.accessor))
accessor = accessors[args.accessor]
else:
print("Using local access")
accessor = LocalAccessor()

if args.subcommand in availableArchiveCmds:
availableArchiveCmds[args.subcommand][0](args.args)
availableArchiveCmds[args.subcommand][0](accessor, args.args)
else:
parser.error("Unknown subcommand '{}'".format(args.subcommand))

10 changes: 10 additions & 0 deletions pym/bob/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -2897,6 +2897,7 @@ def __init__(self):
self.__scmOverrides = []
self.__hooks = {}
self.__projectGenerators = {}
self.__archiveAccessors = {}
self.__configFiles = []
self.__properties = {}
self.__states = {}
Expand Down Expand Up @@ -3157,6 +3158,12 @@ def __loadPlugin(self, mangledName, fileName, name):
}
self.__projectGenerators.update(projectGenerators)

archiveAccessors = manifest.get('archiveAccessors', {})
if not isinstance(archiveAccessors, dict):
raise ParseError("Plugin '"+fileName+"': 'archiveAccessor' has wrong type!")
if archiveAccessors:
self.__archiveAccessors.update(archiveAccessors)

properties = manifest.get('properties', {})
if not isinstance(properties, dict):
raise ParseError("Plugin '"+fileName+"': 'properties' has wrong type!")
Expand Down Expand Up @@ -3225,6 +3232,9 @@ def defineHook(self, name, value):
def setConfigFiles(self, configFiles):
self.__configFiles = configFiles

def getArchiveAccessors (self):
return self.__archiveAccessors

def getCommandConfig(self):
return self.__commandConfig

Expand Down

0 comments on commit e0709d5

Please sign in to comment.