diff --git a/.appveyor.yml b/.appveyor.yml index 09668d8..e864f9c 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -108,6 +108,7 @@ install: build_script: - python -m pip install -r requirements-devel.txt - python -m pip install . + - python -m pip uninstall dataclasses # TODO: where do dataclasses come from on python3.8? Our packages should not require them! #after_build: diff --git a/datalad_debian/metadata/extractors/debian_package_dataset.py b/datalad_debian/metadata/extractors/debian_package_dataset.py new file mode 100644 index 0000000..2451ada --- /dev/null +++ b/datalad_debian/metadata/extractors/debian_package_dataset.py @@ -0,0 +1,229 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 noet: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""MetadataRecord extractor for built debian packages""" +import logging +from dataclasses import dataclass +from typing import ( + List, + Optional, +) +from uuid import UUID + +from debian.deb822 import ( + BuildInfo, + Changes, + Dsc, +) +from debian.debfile import DebFile +from datalad.api import get +from datalad_metalad.extractors.base import ( + DatasetMetadataExtractor, + DataOutputCategory, + ExtractorResult, +) + + +@dataclass +class DebianPackageVersion: + name: str + version_string: str + upstream_version: str + debian_revision: str + platforms: List + + +lgr = logging.getLogger('datalad.debian.extractors.package') + + +class DebianPackageElementNames: + def __init__(self, + name: str, + upstream_version: str, + debian_revision: str, + platform: Optional[str] = None): + + upstream_name = f"{name}_{upstream_version}" + debian_name = ( + f"{upstream_name}-{debian_revision}" + if debian_revision != "0" + else upstream_name + ) + platform_name = f"{debian_name}_{platform}" + dbgsym_name = platform_name.replace(f"{name}_", f"{name}-dbgsym_") + + self._names = { + "dsc": f"{debian_name}.dsc", + "orig": f"{upstream_name}.orig.tar.gz", + "debian": f"{debian_name}.debian.tar.gz", + **({ + "deb": f"{platform_name}.deb", + "dbgsym": f"{dbgsym_name}.deb", + "changes": f"{platform_name}.changes", + "buildinfo": f"{platform_name}.buildinfo", + } if platform else {}), + } + + def __getitem__(self, item): + return self._names[item] + + def __len__(self): + return len(self._names) + + def keys(self): + return self._names.keys() + + def items(self): + return self._names.items() + + def values(self): + return self._names.values() + + +class DebianPackageExtractor(DatasetMetadataExtractor): + + def get_id(self) -> UUID: + return UUID("d6203798-fa94-49d8-b71e-54d5fa63a7b4") + + def get_version(self) -> str: + return "0.0.1" + + def get_data_output_category(self) -> DataOutputCategory: + return DataOutputCategory.IMMEDIATE + + def get_required_content(self) -> bool: + get(path=".", dataset=self.dataset) + return True + + def extract(self, _=None) -> ExtractorResult: + + d = self.dataset.pathobj + + package_name = None + upstream_versions = {} + + for version_info in self._find_versions(): + source_element_names = DebianPackageElementNames( + version_info.name, + version_info.upstream_version, + version_info.debian_revision + ) + + package_name = version_info.name + package_dsc = Dsc(open(d / source_element_names["dsc"], "rt")) + binary_names = { + platform: DebianPackageElementNames( + version_info.name, + version_info.upstream_version, + version_info.debian_revision, + platform) + for platform in version_info.platforms + } + + binary_infos = { + platform: self._get_binary_info(d, binary_names[platform]) + for platform in version_info.platforms + } + + if version_info.upstream_version not in upstream_versions: + upstream_versions[version_info.upstream_version] = { + "orig": f"(NOT IMPLEMENTED): {source_element_names['orig']}", + "debian_revisions": {} + } + version_dict = upstream_versions[version_info.upstream_version] + + if version_info.debian_revision not in version_dict["debian_revisions"]: + version_dict["debian_revisions"][version_info.debian_revision] = { + "binaries": {} + } + revision_dict = version_dict["debian_revisions"][version_info.debian_revision] + + revision_dict["debian"] = source_element_names['debian'] + revision_dict["maintainer"] = package_dsc['maintainer'] + revision_dict["homepage"] = package_dsc.get('homepage', None) + revision_dict["standards-version"] = package_dsc["standards-version"] + + for platform, element_names in binary_names.items(): + assert platform not in revision_dict["binaries"] + revision_dict["binaries"][platform] = binary_infos[platform] + if package_name is not None: + return ExtractorResult( + extractor_version=self.get_version(), + extraction_parameter=self.parameter or {}, + extraction_success=True, + datalad_result_dict={ + "type": "dataset", + "status": "ok", + }, + immediate_data={ + "name": package_name, + "upstream_version": upstream_versions, + } + ) + else: + return ExtractorResult( + extractor_version=self.get_version(), + extraction_parameter=self.parameter or {}, + extraction_success=False, + datalad_result_dict={ + "type": "dataset", + "status": "error", + "message": "no debian package" + } + ) + + def _get_binary_info(self, path, names): + debug_symbols_path = path / names['dbgsym'] + return { + "deb": f"{names['deb']}: {DebFile(path / names['deb'])}", + "build_info": f"{names['buildinfo']}: {BuildInfo(open(path / names['buildinfo'], 'rt'))}", + "changes": f"{names['changes']}: {Changes(open(path / names['changes'], 'rt'))}", + **({ + "dbgsym": f"{names['dbgsym']}: {DebFile(debug_symbols_path)}" + } if debug_symbols_path.exists() else {}) + } + + def _find_versions(self): + """Find all versions and platforms + + Find all versions, i.e. upstream_version and debian_revision. Version + detection is based on '.dsc'-files. Platforms are determined based on + '.deb' files. + """ + package_dir = self.dataset.pathobj + + all_names = set() + for path in package_dir.glob("*.dsc"): + + assert path.is_file() is True, f"Not a file: {path}" + name = path.name.split('_')[0] + all_names.add(name) + assert len(all_names) == 1, f"More than one packet name found: {str(all_names)}" + + version_info = path.name[len(name) + 1:-4] + if "-" in version_info: + upstream_version, debian_revision = version_info.rsplit("-", 1) + else: + upstream_version, debian_revision = version_info, "0" + + dsc = Dsc(path.open("rt")) + assert dsc["source"] == package_dir.name, f"directory name ({package_dir.name}) does not match source ({dsc['source']}) in .dsc-file." + assert dsc["source"] == name, f"file name ({name}) does not match source ({dsc['source']}) in .dsc-file." + assert dsc["version"] == version_info, f"version in file name ({version_info}) does not match version ({dsc['version']}) in .dsc-file." + + platform_paths = [ + platform_path.name[len(f"{name}_{version_info}_"):-4] + for platform_path + in package_dir.glob(f"{name}_{version_info}_*.deb")] + + yield DebianPackageVersion( + name, + version_info, + upstream_version, + debian_revision, + platform_paths) diff --git a/setup.cfg b/setup.cfg index 1c0143d..61542e5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,6 +16,7 @@ python_requires = >= 3.7 install_requires = datalad >= 0.17 datalad-container + datalad-metalad python-debian packages = find: include_package_data = True @@ -42,6 +43,9 @@ datalad.extensions = # valid datalad interface specification (see demo in this extensions) datalad_debian = datalad_debian:command_suite +datalad.metadata.extractors = + debian_package_dataset = datalad_debian.metadata.extractors.debian_package_dataset:DebianPackageExtractor + [versioneer] # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the diff --git a/setup.py b/setup.py index 00cdf7e..ed01d4c 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import sys from setuptools import setup import versioneer @@ -12,8 +11,8 @@ cmdclass.update(build_manpage=BuildManPage) if __name__ == '__main__': - setup(name='datalad_debian', - version=versioneer.get_version(), - cmdclass=cmdclass, + setup( + name='datalad_debian', + version=versioneer.get_version(), + cmdclass=cmdclass, ) -