generated from aboutcode-org/skeleton
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add pipeline to advertise scancode.io scans
Signed-off-by: Keshav Priyadarshi <[email protected]>
- Loading branch information
1 parent
12c30d3
commit 6d01e09
Showing
5 changed files
with
295 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
# | ||
# Copyright (c) nexB Inc. and others. All rights reserved. | ||
# FederatedCode is a trademark of nexB Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
# See https://github.com/nexB/federatedcode for support or download. | ||
# See https://aboutcode.org for more information about AboutCode.org OSS projects. | ||
# | ||
|
||
import logging | ||
from datetime import datetime | ||
from datetime import timezone | ||
from timeit import default_timer as timer | ||
|
||
from aboutcode.pipeline import BasePipeline | ||
from aboutcode.pipeline import humanize_time | ||
|
||
module_logger = logging.getLogger(__name__) | ||
|
||
|
||
class classproperty(object): | ||
def __init__(self, fget): | ||
self.fget = fget | ||
|
||
def __get__(self, owner_self, owner_cls): | ||
return self.fget(owner_cls) | ||
|
||
|
||
class FederatedCodePipeline(BasePipeline): | ||
pipeline_id = None # Unique Pipeline ID | ||
|
||
def on_failure(self): | ||
""" | ||
Tasks to run in the event that pipeline execution fails. | ||
Implement cleanup or other tasks that need to be performed | ||
on pipeline failure, such as: | ||
- Removing cloned repositories. | ||
- Deleting downloaded archives. | ||
""" | ||
pass | ||
|
||
def execute(self): | ||
"""Execute each steps in the order defined on this pipeline class.""" | ||
self.log(f"Pipeline [{self.pipeline_name}] starting") | ||
|
||
steps = self.pipeline_class.get_steps(groups=self.selected_groups) | ||
steps_count = len(steps) | ||
pipeline_start_time = timer() | ||
|
||
for current_index, step in enumerate(steps, start=1): | ||
step_name = step.__name__ | ||
|
||
if self.selected_steps and step_name not in self.selected_steps: | ||
self.log(f"Step [{step_name}] skipped") | ||
continue | ||
|
||
self.set_current_step(f"{current_index}/{steps_count} {step_name}") | ||
self.log(f"Step [{step_name}] starting") | ||
step_start_time = timer() | ||
|
||
try: | ||
step(self) | ||
except Exception as exception: | ||
self.log("Pipeline failed") | ||
on_failure_start_time = timer() | ||
self.log(f"Running [on_failure] tasks") | ||
self.on_failure() | ||
on_failure_run_time = timer() - on_failure_start_time | ||
self.log(f"Completed [on_failure] tasks in {humanize_time(on_failure_run_time)}") | ||
|
||
return 1, self.output_from_exception(exception) | ||
|
||
step_run_time = timer() - step_start_time | ||
self.log(f"Step [{step_name}] completed in {humanize_time(step_run_time)}") | ||
|
||
self.set_current_step("") # Reset the `current_step` field on completion | ||
pipeline_run_time = timer() - pipeline_start_time | ||
self.log(f"Pipeline completed in {humanize_time(pipeline_run_time)}") | ||
|
||
return 0, "" | ||
|
||
def log(self, message, level=logging.INFO): | ||
"""Log the given `message` to the current module logger and execution_log.""" | ||
now_local = datetime.now(timezone.utc).astimezone() | ||
timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] | ||
message = f"{timestamp} {message}" | ||
module_logger.log(level, message) | ||
self.append_to_log(message) | ||
|
||
@classproperty | ||
def pipeline_id(cls): | ||
"""Return unique pipeline_id set in cls.pipeline_id""" | ||
|
||
if cls.pipeline_id is None or cls.pipeline_id == "": | ||
raise NotImplementedError("pipeline_id is not defined or is empty") | ||
return cls.pipeline_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# | ||
# Copyright (c) nexB Inc. and others. All rights reserved. | ||
# FederatedCode is a trademark of nexB Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
# See https://github.com/nexB/federatedcode for support or download. | ||
# See https://aboutcode.org for more information about AboutCode.org OSS projects. | ||
# | ||
|
||
from pathlib import Path | ||
from traceback import format_exc as traceback_format_exc | ||
|
||
from aboutcode.pipeline import LoopProgress | ||
|
||
from fedcode.models import Package | ||
from fedcode.models import Repository | ||
from fedcode.pipelines import FederatedCodePipeline | ||
from fedcode.pipes import utils | ||
|
||
|
||
class SyncScanCodeScans(FederatedCodePipeline): | ||
"""Sync Package scans from FederatedCode git repositories.""" | ||
|
||
pipeline_id = "sync_scancode_scans" | ||
|
||
@classmethod | ||
def steps(cls): | ||
return ( | ||
cls.get_git_repos, | ||
cls.sync_scan_repositories, | ||
) | ||
|
||
def get_git_repos(self): | ||
self.git_repos = Repository.objects.all() | ||
|
||
def sync_scan_repositories(self): | ||
repositories_count = self.git_repos.count() | ||
self.log(f"Syncing package scans from {repositories_count:,d} repositories") | ||
|
||
synced_package_scan_count = 0 | ||
progress = LoopProgress(total_iterations=repositories_count, logger=self.log) | ||
for repo in progress.iter(self.git_repos.iterator(chunk_size=2000)): | ||
repository, _ = Repository.objects.get_or_create(url=repo) | ||
repository.git_repo_obj.remotes.origin.pull() | ||
synced_package_scan_count += sync_scancodeio_scan( | ||
repository=repository, | ||
logger=self.log, | ||
) | ||
|
||
self.log(f"Successfully synced {synced_package_scan_count:,d} package scans") | ||
|
||
|
||
def sync_scancodeio_scan(repository, logger): | ||
repo = repository.git_repo_obj | ||
latest_commit_hash = repo.head.commit.hexsha | ||
latest_commit = repo.commit(latest_commit_hash) | ||
|
||
if last_commit_hash := repository.last_imported_commit: | ||
last_imported_commit = repo.commit(last_commit_hash) | ||
diffs = last_imported_commit.diff(latest_commit) | ||
scans = [item for item in diffs if item.a_path.endswith("scancodeio.json")] | ||
scan_count = sync_scan_from_diff(diffs=scans, repository=repository, logger=logger) | ||
else: | ||
scan_count = sync_all_scan(repository=repository, logger=logger) | ||
|
||
repository.last_imported_commit = latest_commit_hash | ||
repository.save() | ||
|
||
return scan_count | ||
|
||
|
||
def sync_scan_from_diff(diffs, repository, logger): | ||
scans = [ | ||
item | ||
for item in diffs | ||
if item.a_path.endswith("scancodeio.json") or item.b_path.endswith("scancodeio.json") | ||
] | ||
scan_count = len(scans) | ||
|
||
logger(f"Syncing {scan_count:,d} package scan from {repository.url}") | ||
progress = LoopProgress(total_iterations=scan_count, logger=logger) | ||
for scan in progress.iter(scans): | ||
change_type = scan.change_type | ||
if change_type in ("A", "M", "R"): | ||
scan_path = scan.b_path | ||
action = utils.create_note | ||
elif change_type == "D": | ||
scan_path = scan.a_path | ||
action = utils.delete_note | ||
|
||
purl = utils.package_metadata_path_to_purl(path=Path(scan_path), version=False) | ||
package, _ = Package.objects.get_or_create(purl=str(purl), service=repository.admin) | ||
note = utils.get_scan_note(path=Path(scan_path)) | ||
action(pkg=package, note_dict=note) | ||
return scan_count | ||
|
||
|
||
def sync_all_scan(repository, logger): | ||
repo = repository.git_repo_obj | ||
root = Path(repo.working_dir) | ||
scan_count = sum(1 for _ in root.rglob("scancodeio.json")) | ||
|
||
scans = root.rglob("scancodeio.json") | ||
logger(f"Syncing {scan_count:,d} package scan from {repo.remotes.origin.url}") | ||
|
||
progress = LoopProgress(total_iterations=scan_count, logger=logger) | ||
for scan in progress.iter(scans): | ||
relative_path = scan.relative_to(root) | ||
purl = utils.package_metadata_path_to_purl(relative_path, version=False) | ||
package, _ = Package.objects.get_or_create(purl=str(purl), service=repository.admin) | ||
note = utils.get_scan_note(path=relative_path) | ||
utils.create_note(pkg=package, note_dict=note) | ||
return scan_count |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# | ||
# Copyright (c) nexB Inc. and others. All rights reserved. | ||
# FederatedCode is a trademark of nexB Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
# See https://github.com/nexB/federatedcode for support or download. | ||
# See https://aboutcode.org for more information about AboutCode.org OSS projects. | ||
# | ||
|
||
import saneyaml | ||
from packageurl import PackageURL | ||
|
||
from fedcode.activitypub import Activity | ||
from fedcode.activitypub import CreateActivity | ||
from fedcode.activitypub import DeleteActivity | ||
from fedcode.models import Note | ||
|
||
|
||
def create_note(pkg, note_dict): | ||
note, _ = Note.objects.get_or_create(acct=pkg.acct, content=saneyaml.dump(note_dict)) | ||
pkg.notes.add(note) | ||
create_activity = CreateActivity(actor=pkg.to_ap, object=note.to_ap) | ||
Activity.federate( | ||
targets=pkg.followers_inboxes, | ||
body=create_activity.to_ap(), | ||
key_id=pkg.key_id, | ||
) | ||
|
||
|
||
def delete_note(pkg, note_dict): | ||
note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(note_dict)) | ||
note_ap = note.to_ap | ||
note.delete() | ||
pkg.notes.remove(note) | ||
|
||
deleted_activity = DeleteActivity(actor=pkg.to_ap, object=note_ap) | ||
Activity.federate( | ||
targets=pkg.followers_inboxes, | ||
body=deleted_activity.to_ap, | ||
key_id=pkg.key_id, | ||
) | ||
|
||
|
||
def package_metadata_path_to_purl(path, version=True): | ||
""" | ||
Return PURL from relative metadata path. | ||
""" | ||
parts = path.parts | ||
if len(parts) < 4: | ||
ValueError("Not a valid package metadata path.") | ||
|
||
purl = f"pkg:{'/'.join(parts[:-2])}" | ||
if version: | ||
purl = f"{purl}@{parts[-2]}" | ||
return PackageURL.from_string(purl=purl) | ||
|
||
|
||
def get_scan_note(path): | ||
"""Return Note for Package scan.""" | ||
purl = package_metadata_path_to_purl(path=path) | ||
|
||
# TODO: Use tool-alias.yml to get tool for corresponding tool | ||
# for scan https://github.com/aboutcode-org/federatedcode/issues/24 | ||
return { | ||
"purl": str(purl), | ||
"scans": [ | ||
{ | ||
"tool": "pkg:pypi/scancode-toolkit", | ||
"file_name": "scancodeio.json", | ||
}, | ||
], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
aboutcode.pipeline==0.1.0 | ||
aboutcode-toolkit==10.1.0 | ||
alabaster==0.7.13 | ||
anyio==4.1.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters