Skip to content

Commit

Permalink
Merge pull request #449 from citrus-it/pkgdepend
Browse files Browse the repository at this point in the history
pkgdepend resolve uses too much memory
  • Loading branch information
hadfl authored Sep 29, 2023
2 parents 5d3b531 + 343f856 commit 5b0d787
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 56 deletions.
56 changes: 32 additions & 24 deletions src/modules/portable/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,40 @@
# CDDL HEADER END
#
# Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
# Copyright 2023 OmniOS Community Edition (OmniOSce) Association.
#

# The portable module provide access to methods that require operating system-
# specific implementations. The module initialization logic selects the right
# implementation the module is loaded. The module methods then
# delegate to the implementation class object.
# delegate to the implementation class object.
#
# The documentation for the methods is provided in this module. To support
# another operating system, each of these methods must be implemented by the
# class for that operating system even if it is effectively a no-op.
# class for that operating system even if it is effectively a no-op.
#
# The module and class must be named using os_[impl], where
# [impl] corresponds to the OS distro, name, or type of OS
# the class implements. For example, to add specific support
# for mandrake linux (above and beyond existing support for
# generic unix), one would create os_mandrake.py.
#
#
# The following high-level groups of methods are defined in this module:
#
#
# - Platform Attribute Methods: These methods give access to
# attributes of the underlying platform not available through
# existing python libraries. For example, the list of implemented
# ISAs of a given platform.
#
#
# - Account access: Retrieval of account information (users and
# groups), in some cases for dormant, relocated OS images.
#
#
# - Miscellaneous filesystem operations: common operations that
# differ in implementation or are only available on a subset
# of OS or filesystem implementations, such as chown() or rename().
# of OS or filesystem implementations, such as chown() or rename().

# This module exports the methods defined below. They are defined here as
# not implemented to avoid pylint errors. The included OS-specific module
# This module exports the methods defined below. They are defined here as
# not implemented to avoid pylint errors. The included OS-specific module
# redefines the methods with an OS-specific implementation.

# Platform Methods
Expand All @@ -67,31 +68,37 @@ def get_release():
must be a dot-separated set of integers (i.e. no alphabetic
or punctuation)."""
raise NotImplementedError

def get_platform():
""" Return a string representing the current hardware model
information, e.g. "i86pc"."""
raise NotImplementedError

def get_file_type(actions):
""" Return a list containing the file type for each file in paths."""
def get_file_type(path):
""" Return a value indicating the type of file found at path.
The return value is one of file type constants defined below."""
raise NotImplementedError

def get_actions_file_type(actions):
""" Return an iterator or list containing the file type for each file
in the list of provided actions."""
raise NotImplementedError

# Account access
# --------------
def get_group_by_name(name, dirpath, use_file):
""" Return the group ID for a group name.
If use_file is true, an OS-specific file from within the file tree
rooted by dirpath will be consulted, if it exists. Otherwise, the
rooted by dirpath will be consulted, if it exists. Otherwise, the
group ID is retrieved from the operating system.
Exceptions:
Exceptions:
KeyError if the specified group does not exist"""
raise NotImplementedError

def get_user_by_name(name, dirpath, use_file):
""" Return the user ID for a user name.
If use_file is true, an OS-specific file from within the file tree
rooted by dirpath will be consulted, if it exists. Otherwise, the
rooted by dirpath will be consulted, if it exists. Otherwise, the
user ID is retrieved from the operating system.
Exceptions:
KeyError if the specified group does not exist"""
Expand All @@ -100,7 +107,7 @@ def get_user_by_name(name, dirpath, use_file):
def get_name_by_gid(gid, dirpath, use_file):
""" Return the group name for a group ID.
If use_file is true, an OS-specific file from within the file tree
rooted by dirpath will be consulted, if it exists. Otherwise, the
rooted by dirpath will be consulted, if it exists. Otherwise, the
group name is retrieved from the operating system.
Exceptions:
KeyError if the specified group does not exist"""
Expand All @@ -109,7 +116,7 @@ def get_name_by_gid(gid, dirpath, use_file):
def get_name_by_uid(uid, dirpath, use_file):
""" Return the user name for a user ID.
If use_file is true, an OS-specific file from within the file tree
rooted by dirpath will be consulted, if it exists. Otherwise, the
rooted by dirpath will be consulted, if it exists. Otherwise, the
user name is retrieved from the operating system.
Exceptions:
KeyError if the specified group does not exist"""
Expand Down Expand Up @@ -144,7 +151,7 @@ def chown(path, owner, group):
""" Change ownership of a file in an OS-specific way.
The owner and group ownership information should be applied to
the given file, if applicable on the current runtime OS.
Exceptions:
Exceptions:
EnvironmentError (or subclass) if the path does not exist
or ownership cannot be changed"""
raise NotImplementedError
Expand All @@ -167,7 +174,7 @@ def link(src, dst):
def remove(path):
""" Remove the given file in an OS-specific way
Exceptions:
OSError (or subclass) if the source path does not exist or
OSError (or subclass) if the source path does not exist or
the file cannot be removed"""
raise NotImplementedError

Expand All @@ -183,7 +190,7 @@ def copyfile(src, dst):
raise NotImplementedError

def split_path(path):
""" Splits a path and gives back the components of the path.
""" Splits a path and gives back the components of the path.
This is intended to hide platform-specific details about splitting
a path into its components. This interface is similar to
os.path.split() except that the entire path is split, not just
Expand All @@ -195,7 +202,7 @@ def split_path(path):
raise NotImplementedError

def get_root(path):
""" Returns the 'root' of the given path.
""" Returns the 'root' of the given path.
This should include any and all components of a path up to the first
non-platform-specific component. For example, on Windows,
it should include the drive letter prefix.
Expand All @@ -208,7 +215,7 @@ def get_root(path):
def assert_mode(path, mode):
""" Checks that the file identified by path has the given mode to
the extent possible by the host operating system. Otherwise raises
an AssertionError where the mode attribute of the assertion is the
an AssertionError where the mode attribute of the assertion is the
mode of the file."""
raise NotImplementedError

Expand All @@ -231,7 +238,8 @@ def get_sysattr_dict():

# File type constants
# -------------------
ELF, EXEC, UNFOUND, SMF_MANIFEST = range(0, 4)
ELF, EXEC, UNFOUND, SMF_MANIFEST, XMLDOC, EMPTYFILE, NOTFILE, UNKNOWN = \
range(0, 8)

# String to be used for an action attribute created for the internal use of
# dependency analysis.
Expand Down Expand Up @@ -267,7 +275,7 @@ def get_sysattr_dict():

# try the most-specific module name first (e.g. os_suse),
# then try the more generic OS Name module (e.g. os_linux),
# then the OS type module (e.g. os_unix)
# then the OS type module (e.g. os_unix)
try:
exec('from .{0} import *'.format(modname))
break
Expand Down
61 changes: 36 additions & 25 deletions src/modules/portable/os_sunos.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
get_group_by_name, get_user_by_name, get_name_by_gid, get_name_by_uid, \
get_usernames_by_gid, is_admin, get_userid, get_username, chown, rename, \
remove, link, copyfile, split_path, get_root, assert_mode
from pkg.portable import ELF, EXEC, PD_LOCAL_PATH, UNFOUND, SMF_MANIFEST
from pkg.portable import PD_LOCAL_PATH, \
ELF, EXEC, UNFOUND, SMF_MANIFEST, XMLDOC, EMPTYFILE, NOTFILE, UNKNOWN

import pkg.arch as arch
from pkg.sysattr import fgetattr, fsetattr
Expand All @@ -54,32 +55,42 @@ def get_release():
def get_platform():
return arch.get_platform()

def get_file_type(actions):
def get_file_type(path):
from pkg.flavor.smf_manifest import is_smf_manifest
for a in actions:
lpath = a.attrs[PD_LOCAL_PATH]
if os.stat(lpath).st_size == 0:
# Some tests rely on this being identified
yield "empty file"
continue
try:
with open(lpath, 'rb') as f:
magic = f.read(4)
except FileNotFoundError:
yield UNFOUND
continue
if magic == b'\x7fELF':
yield ELF
elif magic[:2] == b'#!':
yield EXEC
elif lpath.endswith('.xml'):
if is_smf_manifest(lpath):
yield SMF_MANIFEST
else:
# Some tests rely on this type being identified
yield "XML document"

if not os.path.isfile(path):
return NOTFILE

try:
# Some tests rely on this being identified
if os.stat(path).st_size == 0:
return EMPTYFILE
with open(path, 'rb') as f:
magic = f.read(4)
except FileNotFoundError:
return UNFOUND
except OSError:
# Most likely EPERM
return UNKNOWN

if magic == b'\x7fELF':
return ELF

if magic[:2] == b'#!':
return EXEC

if path.endswith('.xml'):
if is_smf_manifest(path):
return SMF_MANIFEST
else:
yield "unknown"
# Some tests rely on this type being identified
return XMLDOC

return UNKNOWN

def get_actions_file_type(actions):
for a in actions:
yield get_file_type(a.attrs[PD_LOCAL_PATH])

# Vim hints
# vim:ts=8:sw=8:et:fdm=marker
45 changes: 44 additions & 1 deletion src/modules/publish/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def list_implicit_deps_for_manifest(mfst, proto_dirs, pkg_vars, dyn_tok_conv,
warnings = []
pkg_attrs = {}
act_list = list(mfst.gen_actions_by_type("file"))
file_types = portable.get_file_type(act_list)
file_types = portable.get_actions_file_type(act_list)
var_dict = dict()

# Collect all variants that are used and not declared and emit a warning
Expand Down Expand Up @@ -1616,6 +1616,38 @@ def prune_debug_attrs(action):
if not k.startswith(base.Dependency.DEPEND_DEBUG_PREFIX))
return actions.depend.DependencyAction(**attrs)

# In order to resolve dependencies, we build a mapping of all files and
# symlinks delivered by installed packages. To reduce the size of that working
# set we apply some pre-filters to discard files which can never be a
# dependency target, either intrinsically (like a man page or image file), or
# because we don't implement a dependency parser for it (e.g. perl).

skip_prefix = (
'usr/share/man',
'opt/ooce/share/man',
)

skip_suffix = (
'.json', '.toml', '.txt', '.html',
'.mf', '.p5m',
'.png', '.jpg', '.gif',
'.pdf',
'.pl', '.pm',
'.h', '.c',
'.rs', '.go', '.js', '.d', '.lua', '.zig', '.rb', '.m4',
'.gz', '.zip',
'.cmake',
'.rst',
'.mo',
'.vim',
'.elc',
'.hpp',
# texinfo
'.tex', '.eps', '.ltx', '.def', '.md', '.ins', '.otf', '.tikz', '.dtx',
'.afm', '.fd', '.enc', '.sty', '.pfb', '.htf', '.vf', '.tfm',

)

def add_fmri_path_mapping(files_dict, links_dict, pfmri, mfst,
distro_vars=None, use_template=False):
"""Add mappings from path names to FMRIs and variants.
Expand All @@ -1639,13 +1671,22 @@ def add_fmri_path_mapping(files_dict, links_dict, pfmri, mfst,
dictionaries with VariantCombinationTemplates instead of
VariantCombinations."""

def filter(action):
path = action.attrs['path']
if path.startswith(skip_prefix):
return True
if path.endswith(skip_suffix):
return True
return False

assert not distro_vars or not use_template
if not use_template:
pvariants = mfst.get_all_variants()
if distro_vars:
pvariants.merge_unknown(distro_vars)

for f in mfst.gen_actions_by_type("file"):
if filter(f): continue
vc = f.get_variant_template()
if not use_template:
vc.merge_unknown(pvariants)
Expand All @@ -1655,6 +1696,7 @@ def add_fmri_path_mapping(files_dict, links_dict, pfmri, mfst,
(pfmri, vc))
for f in itertools.chain(mfst.gen_actions_by_type("hardlink"),
mfst.gen_actions_by_type("link")):
if filter(f): continue
vc = f.get_variant_template()
if not use_template:
vc.merge_unknown(pvariants)
Expand Down Expand Up @@ -1784,6 +1826,7 @@ def __merge_actvct_with_pkgvct(act_vct, pkg_vct):
]
files.installed[pth] = new_val
del tmp_files

# Populate the link dictionary using the installed packages'
# information.
for pth, l in six.iteritems(tmp_links):
Expand Down
10 changes: 5 additions & 5 deletions src/tests/api/t_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2024,7 +2024,7 @@ def _check_all_res(res):
if len(ms) != 1:
raise RuntimeError("Didn't get expected types of "
"missing files:\n{0}".format(ms))
self.assertEqual(list(ms.keys())[0], "empty file")
self.assertEqual(list(ms.keys())[0], portable.EMPTYFILE)
self.assertTrue(len(d_map) == 0)

# This should find the binary file first and thus produce
Expand Down Expand Up @@ -2263,7 +2263,7 @@ def test_broken_manifest(self):

# as it happens, file(1) isn't good at spotting broken
# XML documents, it only sniffs the header - so this file
# gets reported as an 'XML document' despite it being invalid
# gets reported as an XMLDOC despite it being invalid
# XML.
t_path = self.make_manifest(self.broken_smf_manf)
self.make_smf_test_files()
Expand All @@ -2275,14 +2275,14 @@ def test_broken_manifest(self):
self.assertEqual(len(ms), 1, "No unknown files reported during "
"analysis")

if "XML document" not in ms:
if portable.XMLDOC not in ms:
self.assertTrue(False, "Broken SMF manifest file not"
" declared")

broken_path = os.path.join(self.proto_dir, self.paths["broken"])
self.assertEqual(ms["XML document"], broken_path,
self.assertEqual(ms[portable.XMLDOC], broken_path,
"Did not detect broken SMF manifest file: {0} != {1}".format(
broken_path, ms["XML document"]))
broken_path, ms[portable.XMLDOC]))

# We should still be able to resolve the other dependencies
# though and it's important to check that the one broken SMF
Expand Down
2 changes: 1 addition & 1 deletion src/tests/cli/t_pkgdep.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def make_full_res_manf_1(self, proto_area, reason, include_os=False):
depend {pfx}.file=syslog {pfx}.path=var/log fmri={dummy_fmri} type=require {pfx}.reason=usr/foo {pfx}.type=hardlink
""".format(pfx=base.Dependency.DEPEND_DEBUG_PREFIX, dummy_fmri=base.Dependency.DUMMY_FMRI)

res_manf_2_missing = "unknown"
res_manf_2_missing = str(portable.UNKNOWN)

resolve_error = """\
{manf_path} has unresolved dependency '
Expand Down

0 comments on commit 5b0d787

Please sign in to comment.