Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: implement package metadata extractor #112

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ install:
build_script:
- python -m pip install -r requirements-devel.txt
- python -m pip install .
- python -m pip uninstall dataclasses # TODO: where do dataclasses come from on python3.8? Our packages should not require them!


#after_build:
Expand Down
229 changes: 229 additions & 0 deletions datalad_debian/metadata/extractors/debian_package_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""MetadataRecord extractor for built debian packages"""
import logging
from dataclasses import dataclass
from typing import (
List,
Optional,
)
from uuid import UUID

from debian.deb822 import (
BuildInfo,
Changes,
Dsc,
)
from debian.debfile import DebFile
from datalad.api import get
from datalad_metalad.extractors.base import (
DatasetMetadataExtractor,
DataOutputCategory,
ExtractorResult,
)


@dataclass
class DebianPackageVersion:
name: str
version_string: str
upstream_version: str
debian_revision: str
platforms: List


lgr = logging.getLogger('datalad.debian.extractors.package')


class DebianPackageElementNames:
def __init__(self,
name: str,
upstream_version: str,
debian_revision: str,
platform: Optional[str] = None):

upstream_name = f"{name}_{upstream_version}"
debian_name = (
f"{upstream_name}-{debian_revision}"
if debian_revision != "0"
else upstream_name
)
platform_name = f"{debian_name}_{platform}"
dbgsym_name = platform_name.replace(f"{name}_", f"{name}-dbgsym_")

self._names = {
"dsc": f"{debian_name}.dsc",
"orig": f"{upstream_name}.orig.tar.gz",
"debian": f"{debian_name}.debian.tar.gz",
**({
"deb": f"{platform_name}.deb",
"dbgsym": f"{dbgsym_name}.deb",
"changes": f"{platform_name}.changes",
"buildinfo": f"{platform_name}.buildinfo",
} if platform else {}),
}

def __getitem__(self, item):
return self._names[item]

def __len__(self):
return len(self._names)

def keys(self):
return self._names.keys()

def items(self):
return self._names.items()

def values(self):
return self._names.values()


class DebianPackageExtractor(DatasetMetadataExtractor):

def get_id(self) -> UUID:
return UUID("d6203798-fa94-49d8-b71e-54d5fa63a7b4")

def get_version(self) -> str:
return "0.0.1"

def get_data_output_category(self) -> DataOutputCategory:
return DataOutputCategory.IMMEDIATE

def get_required_content(self) -> bool:
get(path=".", dataset=self.dataset)
return True

def extract(self, _=None) -> ExtractorResult:

d = self.dataset.pathobj

package_name = None
upstream_versions = {}

for version_info in self._find_versions():
source_element_names = DebianPackageElementNames(
version_info.name,
version_info.upstream_version,
version_info.debian_revision
)

package_name = version_info.name
package_dsc = Dsc(open(d / source_element_names["dsc"], "rt"))
binary_names = {
platform: DebianPackageElementNames(
version_info.name,
version_info.upstream_version,
version_info.debian_revision,
platform)
for platform in version_info.platforms
}

binary_infos = {
platform: self._get_binary_info(d, binary_names[platform])
for platform in version_info.platforms
}

if version_info.upstream_version not in upstream_versions:
upstream_versions[version_info.upstream_version] = {
"orig": f"(NOT IMPLEMENTED): {source_element_names['orig']}",
"debian_revisions": {}
}
version_dict = upstream_versions[version_info.upstream_version]

if version_info.debian_revision not in version_dict["debian_revisions"]:
version_dict["debian_revisions"][version_info.debian_revision] = {
"binaries": {}
}
revision_dict = version_dict["debian_revisions"][version_info.debian_revision]

revision_dict["debian"] = source_element_names['debian']
revision_dict["maintainer"] = package_dsc['maintainer']
revision_dict["homepage"] = package_dsc.get('homepage', None)
revision_dict["standards-version"] = package_dsc["standards-version"]

for platform, element_names in binary_names.items():
assert platform not in revision_dict["binaries"]
revision_dict["binaries"][platform] = binary_infos[platform]
if package_name is not None:
return ExtractorResult(
extractor_version=self.get_version(),
extraction_parameter=self.parameter or {},
extraction_success=True,
datalad_result_dict={
"type": "dataset",
"status": "ok",
},
immediate_data={
"name": package_name,
"upstream_version": upstream_versions,
}
)
else:
return ExtractorResult(
extractor_version=self.get_version(),
extraction_parameter=self.parameter or {},
extraction_success=False,
datalad_result_dict={
"type": "dataset",
"status": "error",
"message": "no debian package"
}
)

def _get_binary_info(self, path, names):
debug_symbols_path = path / names['dbgsym']
return {
"deb": f"{names['deb']}: {DebFile(path / names['deb'])}",
"build_info": f"{names['buildinfo']}: {BuildInfo(open(path / names['buildinfo'], 'rt'))}",
"changes": f"{names['changes']}: {Changes(open(path / names['changes'], 'rt'))}",
**({
"dbgsym": f"{names['dbgsym']}: {DebFile(debug_symbols_path)}"
} if debug_symbols_path.exists() else {})
}

def _find_versions(self):
"""Find all versions and platforms

Find all versions, i.e. upstream_version and debian_revision. Version
detection is based on '.dsc'-files. Platforms are determined based on
'.deb' files.
"""
package_dir = self.dataset.pathobj

all_names = set()
for path in package_dir.glob("*.dsc"):

assert path.is_file() is True, f"Not a file: {path}"
name = path.name.split('_')[0]
all_names.add(name)
assert len(all_names) == 1, f"More than one packet name found: {str(all_names)}"

version_info = path.name[len(name) + 1:-4]
if "-" in version_info:
upstream_version, debian_revision = version_info.rsplit("-", 1)
else:
upstream_version, debian_revision = version_info, "0"

dsc = Dsc(path.open("rt"))
assert dsc["source"] == package_dir.name, f"directory name ({package_dir.name}) does not match source ({dsc['source']}) in .dsc-file."
assert dsc["source"] == name, f"file name ({name}) does not match source ({dsc['source']}) in .dsc-file."
assert dsc["version"] == version_info, f"version in file name ({version_info}) does not match version ({dsc['version']}) in .dsc-file."

platform_paths = [
platform_path.name[len(f"{name}_{version_info}_"):-4]
for platform_path
in package_dir.glob(f"{name}_{version_info}_*.deb")]

yield DebianPackageVersion(
name,
version_info,
upstream_version,
debian_revision,
platform_paths)
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ python_requires = >= 3.7
install_requires =
datalad >= 0.17
datalad-container
datalad-metalad
python-debian
packages = find:
include_package_data = True
Expand All @@ -42,6 +43,9 @@ datalad.extensions =
# valid datalad interface specification (see demo in this extensions)
datalad_debian = datalad_debian:command_suite

datalad.metadata.extractors =
debian_package_dataset = datalad_debian.metadata.extractors.debian_package_dataset:DebianPackageExtractor

[versioneer]
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
Expand Down
9 changes: 4 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python

import sys
from setuptools import setup
import versioneer

Expand All @@ -12,8 +11,8 @@
cmdclass.update(build_manpage=BuildManPage)

if __name__ == '__main__':
setup(name='datalad_debian',
version=versioneer.get_version(),
cmdclass=cmdclass,
setup(
name='datalad_debian',
version=versioneer.get_version(),
cmdclass=cmdclass,
)