Skip to content

Commit

Permalink
Clean up (removes unused code, updates dependencies, checking for sta…
Browse files Browse the repository at this point in the history
…tus code 200, adjusts database hostname)
  • Loading branch information
lauraschauer committed Jul 18, 2024
1 parent 0f49bc8 commit 566387b
Show file tree
Hide file tree
Showing 8 changed files with 318 additions and 95 deletions.
205 changes: 192 additions & 13 deletions prospector/backenddb/postgres.py

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions prospector/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
import os
import signal
import sys
from typing import Any, Dict

from dotenv import load_dotenv

from llm.llm_service import LLMService
from util.http import ping_backend
Expand All @@ -16,8 +13,6 @@

import core.report as report # noqa: E402
from cli.console import ConsoleWriter, MessageStatus # noqa: E402
from core.prospector import TIME_LIMIT_AFTER # noqa: E402
from core.prospector import TIME_LIMIT_BEFORE # noqa: E402
from core.prospector import prospector # noqa: E402; noqa: E402

# Load logger before doing anything else
Expand Down Expand Up @@ -58,7 +53,10 @@ def main(argv): # noqa: C901

# Whether to use the LLMService
if config.llm_service:
if not config.repository and not config.llm_service.use_llm_repository_url:
if (
not config.repository
and not config.llm_service.use_llm_repository_url
):
logger.error(
"Repository URL was neither specified nor allowed to obtain with LLM support. One must be set."
)
Expand All @@ -80,7 +78,9 @@ def main(argv): # noqa: C901
return

config.pub_date = (
config.pub_date + "T00:00:00Z" if config.pub_date is not None else ""
config.pub_date + "T00:00:00Z"
if config.pub_date is not None
else ""
)

logger.debug("Using the following configuration:")
Expand Down
2 changes: 1 addition & 1 deletion prospector/config-sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ backend: http://localhost:8000
database:
user: postgres
password: example
host: db
host: localhost # Database address; when in containerised version, use 'db', otherwise 'localhost'
port: 5432
dbname: postgres

Expand Down
78 changes: 60 additions & 18 deletions prospector/core/prospector.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ def prospector( # noqa: C901
repository = Git(repository_url, git_cache)

with ConsoleWriter("Git repository cloning") as console:
logger.debug(f"Downloading repository {repository.url} in {repository.path}")
logger.debug(
f"Downloading repository {repository.url} in {repository.path}"
)
repository.clone()

tags = repository.get_tags()
Expand All @@ -129,7 +131,9 @@ def prospector( # noqa: C901

if len(fixing_commit) > 0:
candidates = get_commits_no_tags(repository, fixing_commit)
if len(candidates) > 0 and any([c for c in candidates if c in fixing_commit]):
if len(candidates) > 0 and any(
[c for c in candidates if c in fixing_commit]
):
console.print("Fixing commit found in the advisory references\n")
advisory_record.has_fixing_commit = True

Expand Down Expand Up @@ -160,9 +164,13 @@ def prospector( # noqa: C901
candidates = filter(candidates)

if len(candidates) > limit_candidates:
logger.error(f"Number of candidates exceeds {limit_candidates}, aborting.")
logger.error(
f"Number of candidates exceeds {limit_candidates}, aborting."
)

ConsoleWriter.print(f"Candidates limitlimit exceeded: {len(candidates)}.")
ConsoleWriter.print(
f"Candidates limitlimit exceeded: {len(candidates)}."
)
return None, len(candidates)

with ExecutionTimer(
Expand All @@ -171,10 +179,12 @@ def prospector( # noqa: C901
with ConsoleWriter("\nProcessing commits") as writer:
try:
if use_backend != USE_BACKEND_NEVER:
missing, preprocessed_commits = retrieve_preprocessed_commits(
repository_url,
backend_address,
candidates,
missing, preprocessed_commits = (
retrieve_preprocessed_commits(
repository_url,
backend_address,
candidates,
)
)
except requests.exceptions.ConnectionError:
logger.error(
Expand Down Expand Up @@ -225,7 +235,11 @@ def prospector( # noqa: C901

payload = [c.to_dict() for c in preprocessed_commits]

if len(payload) > 0 and use_backend != USE_BACKEND_NEVER and len(missing) > 0:
if (
len(payload) > 0
and use_backend != USE_BACKEND_NEVER
and len(missing) > 0
):
save_preprocessed_commits(backend_address, payload)
else:
logger.warning("Preprocessed commits are not being sent to backend")
Expand All @@ -242,17 +256,23 @@ def prospector( # noqa: C901
return ranked_candidates, advisory_record


def preprocess_commits(commits: List[RawCommit], timer: ExecutionTimer) -> List[Commit]:
def preprocess_commits(
commits: List[RawCommit], timer: ExecutionTimer
) -> List[Commit]:
preprocessed_commits: List[Commit] = list()
with Counter(timer.collection.sub_collection("commit preprocessing")) as counter:
with Counter(
timer.collection.sub_collection("commit preprocessing")
) as counter:
counter.initialize("preprocessed commits", unit="commit")
for raw_commit in tqdm(
commits,
desc="Processing commits",
unit=" commit",
):
counter.increment("preprocessed commits")
counter_val = counter.__dict__["collection"]["preprocessed commits"][0]
counter_val = counter.__dict__["collection"][
"preprocessed commits"
][0]
if counter_val % 100 == 0 and counter_val * 2 > time.time():
pass
preprocessed_commits.append(make_from_raw_commit(raw_commit))
Expand Down Expand Up @@ -287,7 +307,9 @@ def evaluate_commits(
"""
with ExecutionTimer(core_statistics.sub_collection("candidates analysis")):
with ConsoleWriter("Candidate analysis") as _:
ranked_commits = apply_rules(commits, advisory, enabled_rules=enabled_rules)
ranked_commits = apply_rules(
commits, advisory, enabled_rules=enabled_rules
)

return ranked_commits

Expand All @@ -305,7 +327,9 @@ def remove_twins(commits: List[Commit]) -> List[Commit]:
return output


def tag_and_aggregate_commits(commits: List[Commit], next_tag: str) -> List[Commit]:
def tag_and_aggregate_commits(
commits: List[Commit], next_tag: str
) -> List[Commit]:
return commits
if next_tag is None or next_tag == "":
return commits
Expand Down Expand Up @@ -347,7 +371,9 @@ def retrieve_preprocessed_commits(
break # return list(candidates.values()), list()
responses.append(r.json())

retrieved_commits = [commit for response in responses for commit in response]
retrieved_commits = [
commit for response in responses for commit in response
]

logger.info(f"Found {len(retrieved_commits)} preprocessed commits")

Expand All @@ -368,7 +394,9 @@ def retrieve_preprocessed_commits(


def save_preprocessed_commits(backend_address, payload):
with ExecutionTimer(core_statistics.sub_collection(name="save commits to backend")):
with ExecutionTimer(
core_statistics.sub_collection(name="save commits to backend")
):
with ConsoleWriter("Saving processed commits to backend") as writer:
logger.debug("Sending processing commits to backend...")
try:
Expand All @@ -377,6 +405,7 @@ def save_preprocessed_commits(backend_address, payload):
json=payload,
headers={"Content-type": "application/json"},
)
r.raise_for_status() # Throw exception if not status 200
logger.debug(
f"Saving to backend completed (status code: {r.status_code})"
)
Expand All @@ -391,6 +420,14 @@ def save_preprocessed_commits(backend_address, payload):
"Could not save preprocessed commits to backend",
status=MessageStatus.WARNING,
)
except requests.exceptions.HTTPError as e:
logger.error(
f"Could not reach backend, request returned with: {e}."
)
writer.print(
"Could not save preprocessed commits to backend",
status=MessageStatus.WARNING,
)


# tries to be dynamic
Expand Down Expand Up @@ -428,7 +465,11 @@ def get_commits_from_tags(
with ConsoleWriter("Candidate commit retrieval") as writer:
since = None
until = None
if advisory_record.published_timestamp and not next_tag and not prev_tag:
if (
advisory_record.published_timestamp
and not next_tag
and not prev_tag
):
since = advisory_record.reserved_timestamp - time_limit_before
until = advisory_record.reserved_timestamp + time_limit_after

Expand All @@ -442,7 +483,8 @@ def get_commits_from_tags(

if len(candidates) == 0:
candidates = repository.create_commits(
since=advisory_record.reserved_timestamp - time_limit_before,
since=advisory_record.reserved_timestamp
- time_limit_before,
until=advisory_record.reserved_timestamp + time_limit_after,
next_tag=None,
prev_tag=None,
Expand Down
2 changes: 1 addition & 1 deletion prospector/docker/worker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ COPY docker/worker/etc_supervisor_confd_rqworker.conf.j2 /etc/supervisor.d/rqwor
#VOLUME ["/pythonimports"]
#ENV PYTHONPATH "${PYTHONPATH}:/pythonimports"

VOLUME ["data_sources/nvd/reports"]
VOLUME [ "/data_sources/reports" ]

RUN chmod +x /usr/local/bin/start_rq_worker.sh
#CMD tail -f /dev/null
Expand Down
3 changes: 2 additions & 1 deletion prospector/requirements.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

aiohttp
aiofiles
beautifulsoup4
colorama
datasketch
Expand Down
Loading

0 comments on commit 566387b

Please sign in to comment.