From 8bb0e96e3276dc8abe140096d31d02e0d06a5597 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Mon, 12 Jun 2023 12:27:32 -0500 Subject: [PATCH 01/12] Added type annotations --- .github/workflows/python-package.yml | 3 + setup.cfg | 6 +- src/reposcanner/__init__.py | 0 src/reposcanner/analyses.py | 17 +- src/reposcanner/contrib.py | 227 +++++++++++--------------- src/reposcanner/data.py | 154 +++++++++--------- src/reposcanner/dummy.py | 47 +++--- src/reposcanner/git.py | 164 +++++++++++-------- src/reposcanner/manager.py | 228 ++++++++++++++++----------- src/reposcanner/provenance.py | 89 +++++++---- src/reposcanner/reposcanner.py | 14 +- src/reposcanner/requests.py | 121 +++++--------- src/reposcanner/response.py | 50 ++++-- src/reposcanner/routines.py | 70 ++++---- src/reposcanner/util.py | 8 + tests/test_baseRoutines.py | 39 ++++- tests/test_data.py | 5 +- tests/test_manager.py | 2 +- tests/test_requests.py | 20 --- 19 files changed, 668 insertions(+), 596 deletions(-) create mode 100644 src/reposcanner/__init__.py create mode 100644 src/reposcanner/util.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 8473dc1..499bbe3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -37,6 +37,9 @@ jobs: # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Check with Mypy + run: | + MYPYPATH=src python -m mypy src tests - name: Test with pytest env: DUMMY_GITHUBAPI_TOKEN: ${{ secrets.DUMMY_WORKFLOW_GITHUB_TOKEN }} diff --git a/setup.cfg b/setup.cfg index c45f92f..0ed080c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,9 +57,13 @@ install_requires = pydot tqdm numpy + windows-curses;platform_system=='Windows' pytest-mock pytest-cov - windows-curses;platform_system=='Windows' + mypy + types-tqdm + types-PyYAML + pandas-stubs [options.packages.find] where = src diff --git a/src/reposcanner/__init__.py b/src/reposcanner/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/reposcanner/analyses.py b/src/reposcanner/analyses.py index b78d460..dda0e72 100644 --- a/src/reposcanner/analyses.py +++ b/src/reposcanner/analyses.py @@ -1,11 +1,14 @@ from abc import ABC, abstractmethod +from reposcanner.requests import BaseRequestModel +from reposcanner.response import ResponseModel +from typing import Dict, Optional, Any, Type class DataAnalysis(ABC): """The abstract base class for all data analyses. Methods cover the execution of analyses, rendering, and exporting of data.""" - def canHandleRequest(self, request): + def canHandleRequest(self, request: BaseRequestModel) -> bool: """ Returns True if the routine is capable of handling the request (i.e. the RequestModel is of the type that the analysis expects), and False otherwise. @@ -16,14 +19,14 @@ def canHandleRequest(self, request): return False @abstractmethod - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: """ Returns the class object for the routine's companion request type. """ pass @abstractmethod - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: """ Contains the code for processing data generated by mining routines and/or from external databases. @@ -35,7 +38,7 @@ def execute(self, request): """ pass - def run(self, request): + def run(self, request: BaseRequestModel) -> ResponseModel: """ Encodes the workflow of a DataAnalysis object. The client only needs to run this method in order to get results. @@ -43,7 +46,7 @@ def run(self, request): response = self.execute(request) return response - def hasConfigurationParameters(self): + def hasConfigurationParameters(self) -> bool: """ Checks whether the analysis object was passed configuration parameters, whether valid or not. Routines are not required to do anything with parameters @@ -55,7 +58,7 @@ def hasConfigurationParameters(self): except BaseException: return False - def getConfigurationParameters(self): + def getConfigurationParameters(self) -> Optional[Dict[str, Any]]: """ Returns the configuration parameters assigned to the analysis. """ @@ -65,7 +68,7 @@ def getConfigurationParameters(self): except BaseException: return None - def setConfigurationParameters(self, configParameters): + def setConfigurationParameters(self, configParameters: Dict[str, Any]) -> None: """ Assigns configuration parameters to a newly created analysis. """ diff --git a/src/reposcanner/contrib.py b/src/reposcanner/contrib.py index 5417ad1..723c70b 100644 --- a/src/reposcanner/contrib.py +++ b/src/reposcanner/contrib.py @@ -1,10 +1,15 @@ +# This future import allows us to reference a class in type annotations before it is declared. +from __future__ import annotations from reposcanner.routines import OfflineRepositoryRoutine, OnlineRepositoryRoutine from reposcanner.analyses import DataAnalysis -from reposcanner.requests import OfflineRoutineRequest, OnlineRoutineRequest, AnalysisRequestModel -from reposcanner.response import ResponseFactory +from reposcanner.requests import BaseRequestModel, OfflineRoutineRequest, OnlineRoutineRequest, AnalysisRequestModel +from reposcanner.response import ResponseFactory, ResponseModel from reposcanner.provenance import ReposcannerRunInformant -from reposcanner.data import DataEntityFactory -import pygit2 +from reposcanner.data import DataEntityFactory, ReposcannerDataEntity, AnnotatedCSVData +from reposcanner.util import replaceNoneWithEmptyString +from reposcanner.git import Session +from typing import Dict, Type, List +import pygit2 # type: ignore from pathlib import Path import time @@ -21,8 +26,7 @@ class CommitInfoMiningRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class CommitInfoMiningRoutine(OfflineRepositoryRoutine): @@ -31,11 +35,8 @@ class CommitInfoMiningRoutine(OfflineRepositoryRoutine): authorship information, the commit message, and which files were interacted with. """ - def getRequestType(self): - return CommitInfoMiningRoutineRequest - - def offlineImplementation(self, request, session): - + def offlineImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, CommitInfoMiningRoutineRequest) factory = DataEntityFactory() fout = Path(request.getOutputDirectory()) \ / "{repoOwner}_{repoName}_CommitInfoMining.csv".format( @@ -80,7 +81,7 @@ def offlineImplementation(self, request, session): ["list"] + ["str"]) - def _getFilesTouched(commit): + def _getFilesTouched(commit) -> List[str]: # TODO: Go back and check this method. Are we correctly interpreting the semantics of # the deltas we receive from pygit2? changes = [] @@ -103,13 +104,13 @@ def _getFilesTouched(commit): changes.append(delta.new_file.path) return changes - def _cleanCommitMessage(s): + def _cleanCommitMessage(s: str) -> str: # This replaces all sequences of whitespace characters # with a single space, eliminating tabs, newlines, etc. # Also get rid of commas, as commas are our default delimiter. return re.sub('\\s+', ' ', s).replace(',', ' ') - def _getStats(commit): + def _getStats(commit) -> Dict[str, int]: changes = {'ins': 0, 'del': 0, 'files': 0} if len(commit.parents) == 0: diff = commit.tree.diff_to_tree() @@ -126,16 +127,8 @@ def _getStats(commit): changes['files'] += diff.stats.files_changed return changes - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - for commit in session.walk(session.head.target, pygit2.GIT_SORT_TIME | pygit2.GIT_SORT_TOPOLOGICAL): - extractedCommitData = {} - # The person who originally made the change and when they made it, a # pygit2.Signature. author = commit.author @@ -180,18 +173,18 @@ def _replaceNoneWithEmptyString(value): filesTouched = _getFilesTouched(commit) - output.addRecord([_replaceNoneWithEmptyString(commitHash), - _replaceNoneWithEmptyString(commitTime), - _replaceNoneWithEmptyString(authorEmail), - _replaceNoneWithEmptyString(authorName), - _replaceNoneWithEmptyString(authorTime), - _replaceNoneWithEmptyString(committerEmail), - _replaceNoneWithEmptyString(committerName), - _replaceNoneWithEmptyString(committerTime), + output.addRecord([replaceNoneWithEmptyString(commitHash), + replaceNoneWithEmptyString(commitTime), + replaceNoneWithEmptyString(authorEmail), + replaceNoneWithEmptyString(authorName), + replaceNoneWithEmptyString(authorTime), + replaceNoneWithEmptyString(committerEmail), + replaceNoneWithEmptyString(committerName), + replaceNoneWithEmptyString(committerTime), ";".join(coAuthors), changes['ins'], changes['del'], changes['files'], ';'.join(filesTouched), - _cleanCommitMessage(_replaceNoneWithEmptyString(commitMessage)) + _cleanCommitMessage(replaceNoneWithEmptyString(commitMessage)) ]) output.writeToFile() @@ -200,21 +193,7 @@ def _replaceNoneWithEmptyString(value): class OnlineCommitAuthorshipRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class OnlineCommitAuthorshipRoutine(OnlineRepositoryRoutine): @@ -223,16 +202,8 @@ class OnlineCommitAuthorshipRoutine(OnlineRepositoryRoutine): with GitHub/Gitlab/Bitbucket account information. """ - def getRequestType(self): - return OnlineCommitAuthorshipRoutineRequest - - def githubImplementation(self, request, session): - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - + def githubImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, OnlineCommitAuthorshipRoutineRequest) factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_OnlineCommitAuthorship.csv".format( @@ -260,22 +231,17 @@ def _replaceNoneWithEmptyString(value): else: committerLogin = None - output.addRecord([_replaceNoneWithEmptyString(commitHash), - _replaceNoneWithEmptyString(authorLogin), - _replaceNoneWithEmptyString(committerLogin)]) + output.addRecord([replaceNoneWithEmptyString(commitHash), + replaceNoneWithEmptyString(authorLogin), + replaceNoneWithEmptyString(committerLogin)]) output.writeToFile() responseFactory = ResponseFactory() return responseFactory.createSuccessResponse( message="OnlineCommitAuthorshipRoutine completed!", attachments=output) - def gitlabImplementation(self, request, session): - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - + def gitlabImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, OnlineCommitAuthorshipRoutineRequest) factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_OnlineCommitAuthorship.csv".format( @@ -315,23 +281,23 @@ def _replaceNoneWithEmptyString(value): else: committerLogin = None - output.addRecord([_replaceNoneWithEmptyString(commitHash), - _replaceNoneWithEmptyString(authorLogin), - _replaceNoneWithEmptyString(committerLogin)]) + output.addRecord([replaceNoneWithEmptyString(commitHash), + replaceNoneWithEmptyString(authorLogin), + replaceNoneWithEmptyString(committerLogin)]) output.writeToFile() responseFactory = ResponseFactory() return responseFactory.createSuccessResponse( message="OnlineCommitAuthorshipRoutine completed!", attachments=output) - def bitbucketImplementation(self, request, session): + def bitbucketImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: # TODO: Implement Commit Author Identification Routine implementation for # Gitlab. - pass + raise NotImplementedError class GambitCommitAuthorshipInferenceAnalysisRequest(AnalysisRequestModel): - def criteriaFunction(self, entity): + def criteriaFunction(self, entity: ReposcannerDataEntity) -> bool: try: creator = entity.getCreator() if creator == "CommitInfoMiningRoutine": @@ -361,31 +327,29 @@ class GambitCommitAuthorshipInferenceAnalysis(DataAnalysis): """ - def __init__(self): + def __init__(self) -> None: """ We check for the presence of the (optional) gambit package. This analysis cannot run unless gambit-disambig is installed. """ super(GambitCommitAuthorshipInferenceAnalysis, self).__init__() try: - import gambit + import gambit # type: ignore except ImportError: - self.gambitIsAvailable = False self.gambitImportRef = None else: - self.gambitIsAvailable = True self.gambitImportRef = gambit - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: """ Returns the class object for the routine's companion request type. """ return GambitCommitAuthorshipInferenceAnalysisRequest - def execute(self, request): - + def execute(self, request: BaseRequestModel) -> ResponseModel: + assert isinstance(request, GambitCommitAuthorshipInferenceAnalysisRequest) responseFactory = ResponseFactory() - if not self.gambitIsAvailable: + if not self.gambitImportRef is not None: return responseFactory.createFailureResponse(message="Gambit is not \ installed, halting execution.") @@ -416,6 +380,7 @@ def execute(self, request): contributorNamesAndEmails = set() for commitLogEntity in commitLogEntities: + assert isinstance(commitLogEntity, AnnotatedCSVData) commitLogFrame = commitLogEntity.getDataFrame() # TODO: Add support for co-authors listed in commit messages. We now collect this data # when running CommitInfoMiningRoutine, but we aren't yet checking it here. @@ -452,7 +417,7 @@ def execute(self, request): class VerifiedCommitAuthorshipAnalysisRequest(AnalysisRequestModel): - def criteriaFunction(self, entity): + def criteriaFunction(self, entity: ReposcannerDataEntity) -> bool: try: creator = entity.getCreator() if creator == "OnlineCommitAuthorshipRoutine" or creator == "CommitInfoMiningRoutine": @@ -465,22 +430,22 @@ def criteriaFunction(self, entity): class VerifiedCommitAuthorshipAnalysis(DataAnalysis): - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: """ Returns the class object for the routine's companion request type. """ return VerifiedCommitAuthorshipAnalysisRequest - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: + assert isinstance(request, VerifiedCommitAuthorshipAnalysisRequest) # TODO: Set up Verified Commit Authorship Analysis. - pass + raise NotImplementedError ######################################## class OfflineCommitCountsRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class OfflineCommitCountsRoutine(OfflineRepositoryRoutine): @@ -489,10 +454,11 @@ class OfflineCommitCountsRoutine(OfflineRepositoryRoutine): emails of contributors. """ - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: return OfflineCommitCountsRoutineRequest - def offlineImplementation(self, request, session): + def offlineImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, OfflineCommitCountsRoutineRequest) numberOfCommitsByContributor = {} for commit in session.walk(session.head.target, pygit2.GIT_SORT_TOPOLOGICAL): @@ -527,21 +493,7 @@ def offlineImplementation(self, request, session): class ContributorAccountListRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class ContributorAccountListRoutine(OnlineRepositoryRoutine): @@ -551,16 +503,11 @@ class ContributorAccountListRoutine(OnlineRepositoryRoutine): # TODO: ContributorAccountListRoutine lacks a # bitbucketImplementation(self,request,session) method. - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: return ContributorAccountListRoutineRequest - def _replaceNoneWithEmptyString(self, value): - if value is None: - return "" - else: - return value - - def githubImplementation(self, request, session): + def githubImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, ContributorAccountListRoutineRequest) factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_contributorAccounts.csv".format( @@ -578,9 +525,9 @@ def githubImplementation(self, request, session): contributors = [contributor for contributor in session.get_contributors()] for contributor in contributors: output.addRecord([ - self._replaceNoneWithEmptyString(contributor.login), - self._replaceNoneWithEmptyString(contributor.name), - ';'.join([self._replaceNoneWithEmptyString(contributor.email)]) + replaceNoneWithEmptyString(contributor.login), + replaceNoneWithEmptyString(contributor.name), + ';'.join([replaceNoneWithEmptyString(contributor.email)]) ]) @@ -589,8 +536,10 @@ def githubImplementation(self, request, session): return responseFactory.createSuccessResponse( message="Completed!", attachments=output) - def gitlabImplementation(self, request, session): + def gitlabImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, ContributorAccountListRoutineRequest) contributors = [contributor for contributor in session.users.list()] + factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_contributorAccounts.csv".format( outputDirectory=request.getOutputDirectory(), @@ -607,8 +556,8 @@ def gitlabImplementation(self, request, session): contributors = [contributor for contributor in session.get_contributors()] for contributor in contributors: output.addRecord([ - self._replaceNoneWithEmptyString(contributor.username), - self._replaceNoneWithEmptyString(contributor.name), + replaceNoneWithEmptyString(contributor.username), + replaceNoneWithEmptyString(contributor.name), ';'.join(contributor.emails.list()) ]) @@ -619,22 +568,22 @@ def gitlabImplementation(self, request, session): class FileInteractionRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class FileInteractionRoutine(OfflineRepositoryRoutine): - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: return FileInteractionRoutineRequest - def offlineImplementation(self, request, session): + def offlineImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, FileInteractionRoutineRequest) # TODO: Implement offline, commit-based file interaction routine (in the # vein of Vasilescu et al.). - pass + raise NotImplementedError class ContributorFileInteractionAnalysisRequest(AnalysisRequestModel): - def criteriaFunction(self, entity): + def criteriaFunction(self, entity: ReposcannerDataEntity) -> bool: """ Here we assume that the entity is, in fact, a ReposcannerDataEntity. Because we haven't yet @@ -655,19 +604,19 @@ def criteriaFunction(self, entity): class ContributorFileInteractionAnalysis(DataAnalysis): - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: """ Returns the class object for the routine's companion request type. """ return ContributorFileInteractionAnalysisRequest - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: # TODO: Set up the Contributor File Interaction Analysis - pass + raise NotImplementedError() class TeamSizeAndDistributionAnalysisRequest(AnalysisRequestModel): - def criteriaFunction(self, entity): + def criteriaFunction(self, entity: ReposcannerDataEntity) -> bool: """ Here we assume that the entity is, in fact, a ReposcannerDataEntity. Because we haven't yet @@ -697,13 +646,14 @@ class TeamSizeAndDistributionAnalysis(DataAnalysis): the data, and a CSV file containing the data used to generate those graphs. """ - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: """ Returns the class object for the routine's companion request type. """ return TeamSizeAndDistributionAnalysisRequest - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: + assert isinstance(request, TeamSizeAndDistributionAnalysisRequest) responseFactory = ResponseFactory() data = request.getData() @@ -713,22 +663,24 @@ def execute(self, request): return responseFactory.createFailureResponse( message="Received no ContributorAccountListRoutine data.") - loginData = next( - (entity for entity in data if "github_login.csv" in entity.getFilePath()), + loginDataEnt = next( + (entity for entity in data if "github_login.csv" in entity.getFilePath().name), None) - if loginData is None: + if loginDataEnt is None: return responseFactory.createFailureResponse( message="Failed to find github_login.csv from reposcanner-data.") else: - loginData = loginData.getDataFrame(firstRowContainsHeaders=True) + assert isinstance(loginDataEnt, AnnotatedCSVData) + loginData = loginDataEnt.getDataFrame(firstRowContainsHeaders=True) - memberData = next( - (entity for entity in data if "members.csv" in entity.getFilePath()), None) - if memberData is None: + memberDataEnt = next( + (entity for entity in data if "members.csv" in entity.getFilePath().name), None) + if memberDataEnt is None: return responseFactory.createFailureResponse( message="Failed to find members.csv from reposcanner-data.") else: - memberData = memberData.getDataFrame(firstRowContainsHeaders=True) + assert isinstance(memberDataEnt, AnnotatedCSVData) + memberData = memberDataEnt.getDataFrame(firstRowContainsHeaders=True) dataEntityFactory = DataEntityFactory() analysisCSVOutput = dataEntityFactory.createAnnotatedCSVData( @@ -746,6 +698,7 @@ def execute(self, request): analysisCSVOutput.setColumnDatatypes(["str", "int", "int", "int"]) for contributorListEntity in contributorListEntities: + assert isinstance(contributorListEntity, AnnotatedCSVData) contributorListFrame = contributorListEntity.getDataFrame() repositoryURL = contributorListEntity.getURL() diff --git a/src/reposcanner/data.py b/src/reposcanner/data.py index 383294c..f3bf665 100644 --- a/src/reposcanner/data.py +++ b/src/reposcanner/data.py @@ -1,11 +1,16 @@ +# This future import allows us to reference a class in type annotations before it is declared. +from __future__ import annotations from abc import ABC, abstractmethod import csv +import _csv import re import os import hashlib import yaml import pandas as pd +from pathlib import Path import datetime +from typing import List, Iterable, Dict, Callable, Optional, Any, Tuple, Union class DataEntityStore: @@ -20,10 +25,10 @@ class DataEntityStore: class is used by the ReposcannerManager, who intercepts data following the completion of tasks. """ - def __init__(self): - self._storage = [] + def __init__(self) -> None: + self._storage: List[ReposcannerDataEntity] = [] - def __len__(self): + def __len__(self) -> int: """ Length operator for DataEntityStore. @@ -31,7 +36,7 @@ def __len__(self): """ return len(self._storage) - def __contains__(self, entity): + def __contains__(self, entity: ReposcannerDataEntity) -> bool: """ 'in' operator for DataEntityStore. @@ -39,7 +44,7 @@ def __contains__(self, entity): """ return entity in self._storage - def read(self): + def read(self) -> Iterable[ReposcannerDataEntity]: """ Provides a generator to iterate over all the data held in the store. Usually callers will @@ -48,7 +53,7 @@ def read(self): for entity in self._storage: yield entity - def insert(self, entity): + def insert(self, entity: ReposcannerDataEntity) -> None: """ Add the ReposcannerDataEntity to the data store. @@ -56,7 +61,7 @@ def insert(self, entity): """ self._storage.append(entity) - def remove(self, entity): + def remove(self, entity: ReposcannerDataEntity) -> None: """ Remove an entity from the data store, if it exists. This method will raise a ValueError if there is no such entity. @@ -65,7 +70,7 @@ def remove(self, entity): """ self._storage.remove(entity) - def getByCriteria(self, criteria): + def getByCriteria(self, criteria: Callable[[ReposcannerDataEntity], bool]) -> List[ReposcannerDataEntity]: """ Return only those data entities that meet the specified criteria. @@ -81,10 +86,10 @@ def f(entity): class DataEntityFactory: - def createAnnotatedCSVData(self, filePath): + def createAnnotatedCSVData(self, filePath: Union[Path, str]) -> AnnotatedCSVData: return AnnotatedCSVData(filePath=filePath) - def createYAMLData(self, filePath): + def createYAMLData(self, filePath: Union[Path, str]) -> YAMLData: return YAMLData(filePath=filePath) @@ -94,30 +99,30 @@ class ReposcannerDataEntity(ABC): by Reposcanner and its mining routines and analyses. """ - def __init__(self, filePath): + def __init__(self, filePath: Union[Path, str]) -> None: """ filepath: The path to the file where the data will be written (or read from). metadataAttributes: metadata associated with the data entity. """ - self._metadataAttributes = {} - self._filePath = filePath + self._metadataAttributes: dict[str, Any] = {} + self._filePath = Path(filePath) self.setReposcannerExecutionID(None) self.setDateCreated(None) self.setCreator(None) - def getFilePath(self): + def getFilePath(self) -> Path: return self._filePath - def setMetadataAttribute(self, key, value): + def setMetadataAttribute(self, key: str, value: Any) -> None: self._metadataAttributes[key] = value - def getMetadataAttribute(self, key): + def getMetadataAttribute(self, key: str) -> Any: return self._metadataAttributes[key] - def getAttributeKeys(self): + def getAttributeKeys(self) -> Iterable[str]: return self._metadataAttributes.keys() - def setReposcannerExecutionID(self, executionid): + def setReposcannerExecutionID(self, executionid: Optional[str]) -> None: """ executionid: A string containing an id that uniquely identifies the particular run of the Reposcanner tool that was used to @@ -125,32 +130,32 @@ def setReposcannerExecutionID(self, executionid): """ self.setMetadataAttribute("executionid", executionid) - def getReposcannerExecutionID(self): + def getReposcannerExecutionID(self) -> Optional[str]: return self.getMetadataAttribute("executionid") - def setDateCreated(self, date): + def setDateCreated(self, dt: Optional[datetime.date]) -> None: """ - datetime: A datetime.date object. + dt: A datetime.date object. """ - self.setMetadataAttribute("datecreated", date) + self.setMetadataAttribute("datecreated", dt) - def getDateCreated(self): + def getDateCreated(self) -> Optional[datetime.date]: return self.getMetadataAttribute("datecreated") - def setCreator(self, creator): + def setCreator(self, creator: Optional[str]) -> None: """ creator: A string indicating what routine, analysis, etc. was responsible for creating this data entity. """ self.setMetadataAttribute("creator", creator) - def getCreator(self): + def getCreator(self) -> Optional[str]: return self.getMetadataAttribute("creator") - def fileExists(self): + def fileExists(self) -> bool: return os.path.exists(self._filePath) - def getMD5Hash(self): + def getMD5Hash(self) -> str: """ Compute the MD5 checksum for a file for provenance-tracking purposes. """ @@ -162,7 +167,7 @@ def getMD5Hash(self): return hash_md5.hexdigest() @abstractmethod - def validateMetadata(self): + def validateMetadata(self) -> bool: """ Should hold routines that validate that all necessary metadata is provided and/or is accurate @@ -171,7 +176,7 @@ def validateMetadata(self): pass @abstractmethod - def readFromFile(self): + def readFromFile(self) -> None: """ Load the data in the file. Data will be accessible via this object. @@ -179,7 +184,7 @@ def readFromFile(self): pass @abstractmethod - def writeToFile(self): + def writeToFile(self) -> None: """ Write data held by this object to the file. """ @@ -195,24 +200,24 @@ class YAMLData(ReposcannerDataEntity): These files have no metadata associated with them. """ - def __init__(self, filePath): + def __init__(self, filePath: Union[Path, str]) -> None: """ filepath: The path to the file where the data will be written (or read from). data: The data loaded from or written to the output file in dictionary form. """ - super().__init__(filePath) - self._data = {} + super().__init__(Path(filePath)) + self._data: Dict[str, Any] = {} - def validateMetadata(self): + def validateMetadata(self) -> bool: return True - def getData(self): + def getData(self) -> Dict[str, Any]: return self._data - def setData(self, data): + def setData(self, data: Dict[str, Any]) -> None: self._data = data - def readFromFile(self): + def readFromFile(self) -> None: if not os.path.exists(self.getFilePath()): raise OSError("Reposcanner couldn't find the YAML file ({path})\ Shutting down as a precaution.".format(path=self.getFilePath())) @@ -229,7 +234,7 @@ def readFromFile(self): the file.".format(path=self.getFilePath())) self._data = contents - def writeToFile(self): + def writeToFile(self) -> None: with open(self.getFilePath(), 'w') as outfile: yaml.dump(self._data, outfile, default_flow_style=False) @@ -243,66 +248,66 @@ class AnnotatedCSVData(ReposcannerDataEntity): (Model for Tabular Data and Metadata on the Web, section 5.4). """ - def __init__(self, filePath): - super().__init__(filePath) - self._records = [] + def __init__(self, filePath: Union[Path, str]) -> None: + super().__init__(Path(filePath)) + self._records: List[List[str]] = [] self.setColumnNames([]) self.setColumnDatatypes([]) self.setProjectID(None) self.setProjectName(None) self.setURL(None) - def setProjectID(self, projectid): + def setProjectID(self, projectid: Optional[str]) -> None: """ projectid: A string containing the idea for the project ID associated with a repository """ self.setMetadataAttribute("projectid", projectid) - def getProjectID(self): + def getProjectID(self) -> Optional[str]: return self.getMetadataAttribute("projectid") - def setProjectName(self, projectname): + def setProjectName(self, projectname: Optional[str]) -> None: """ projectname: A string containing the name of the project associated with a repository. """ self.setMetadataAttribute("projectname", projectname) - def getProjectName(self): + def getProjectName(self) -> Optional[str]: return self.getMetadataAttribute("projectname") - def setURL(self, url): + def setURL(self, url: Optional[str]) -> None: """ url: A string containing the URL that points to the repository where the data in this file was mined. """ self.setMetadataAttribute("url", url) - def getURL(self): + def getURL(self) -> Optional[str]: return self.getMetadataAttribute("url") - def getColumnNames(self): + def getColumnNames(self) -> List[str]: return self.getMetadataAttribute("names") - def setColumnNames(self, names): + def setColumnNames(self, names: List[str]) -> None: """ names: A list of strings containing the (in-order) names for each of the columns. """ self.setMetadataAttribute("names", names) - def getColumnDatatypes(self): + def getColumnDatatypes(self) -> List[str]: return self.getMetadataAttribute("datatypes") - def setColumnDatatypes(self, datatypes): + def setColumnDatatypes(self, datatypes: List[str]) -> None: """ datatypes: A list of strings describing the data types for each of the columns. """ return self.setMetadataAttribute("datatypes", datatypes) - def addRecord(self, record): + def addRecord(self, record: List[Any]) -> None: """ record: A list of objects containing the data needed to write out a record. Records are guaranteed to be @@ -310,7 +315,7 @@ def addRecord(self, record): """ self._records.append(record) - def getRawRecords(self): + def getRawRecords(self) -> List[List[Any]]: """ Get a list of lists, each containing the data associated with the record. This method is provided for testing purposes @@ -318,7 +323,7 @@ def getRawRecords(self): """ return self._records - def getDataFrame(self, firstRowContainsHeaders=False): + def getDataFrame(self, firstRowContainsHeaders: bool = False) -> pd.DataFrame: """ Returns file data in the form of a pandas DataFrame. @@ -332,7 +337,7 @@ def getDataFrame(self, firstRowContainsHeaders=False): return pd.DataFrame.from_records( self._records[1:], columns=self._records[0]) - def getRecordsAsDicts(self): + def getRecordsAsDicts(self) -> List[Dict[str, Any]]: """ Returns a list of dictionaries, one for each record, that maps the names of columns to their respective data in the files. @@ -346,7 +351,7 @@ def getRecordsAsDicts(self): recordDicts.append(recordDict) return recordDicts - def validateMetadata(self): + def validateMetadata(self) -> bool: hasExecutionID = self.getReposcannerExecutionID() is not None hasCreator = self.getCreator() is not None hasDateCreated = self.getDateCreated() is not None @@ -360,8 +365,8 @@ def validateMetadata(self): and hasProjectID and hasProjectName and hasURL \ and hasColumnNames and hasColumnDatatypes - def readFromFile(self): - def readMetadataFromFile(text): + def readFromFile(self) -> None: + def readMetadataFromFile(text: str) -> Tuple[str, str]: try: # TODO: This may turn out to be a fragile way of parsing the # metadata line if the metadata value has spaces in it. @@ -383,20 +388,21 @@ def readMetadataFromFile(text): if metadataKey == "names" or metadataKey == "datatypes": # TODO: Parse names and datatypes, which are lists delimited # by semicolons. - metadataValue = metadataValue.split(sep=';') - self.setMetadataAttribute(metadataKey, metadataValue) - else: - if metadataKey == "datecreated": - metadataValue = datetime.date.fromisoformat( + metadataValueSplit = metadataValue.split(sep=';') + self.setMetadataAttribute(metadataKey, metadataValueSplit) + elif metadataKey == "datecreated": + metadataValueDate = datetime.date.fromisoformat( metadataValue) + self.setMetadataAttribute(metadataKey, metadataValueDate) + else: self.setMetadataAttribute(metadataKey, metadataValue) else: currentlyReadingMetadata = False else: self.addRecord(row) - def writeToFile(self): - def writeMetadataFieldToFile(csvwriter, key, value): + def writeToFile(self) -> None: + def writeMetadataFieldToFile(csvwriter: _csv._writer, key: str, value: str) -> None: csvwriter.writerow(["#{key} {value}".format(key=key, value=value)]) with open(self.getFilePath(), 'w', newline='\n') as f: csvwriter = csv.writer( @@ -415,12 +421,18 @@ def writeMetadataFieldToFile(csvwriter, key, value): names = ";".join(self.getColumnNames()) datatypes = ";".join(self.getColumnDatatypes()) - writeMetadataFieldToFile(csvwriter, "executionid", executionid) - writeMetadataFieldToFile(csvwriter, "creator", creator) - writeMetadataFieldToFile(csvwriter, "datecreated", dateCreated) - writeMetadataFieldToFile(csvwriter, "projectid", projectID) - writeMetadataFieldToFile(csvwriter, "projectname", projectName) - writeMetadataFieldToFile(csvwriter, "url", url) + if executionid is not None: + writeMetadataFieldToFile(csvwriter, "executionid", executionid) + if creator is not None: + writeMetadataFieldToFile(csvwriter, "creator", creator) + if dateCreated is not None: + writeMetadataFieldToFile(csvwriter, "datecreated", dateCreated.isoformat()) + if projectID is not None: + writeMetadataFieldToFile(csvwriter, "projectid", projectID) + if projectName is not None: + writeMetadataFieldToFile(csvwriter, "projectname", projectName) + if url is not None: + writeMetadataFieldToFile(csvwriter, "url", url) writeMetadataFieldToFile(csvwriter, "names", names) writeMetadataFieldToFile(csvwriter, "datatypes", datatypes) diff --git a/src/reposcanner/dummy.py b/src/reposcanner/dummy.py index 7a5b5b5..d4499c8 100644 --- a/src/reposcanner/dummy.py +++ b/src/reposcanner/dummy.py @@ -3,16 +3,17 @@ """ from reposcanner.routines import OfflineRepositoryRoutine, OnlineRepositoryRoutine from reposcanner.analyses import DataAnalysis -from reposcanner.requests import OfflineRoutineRequest, OnlineRoutineRequest, AnalysisRequestModel -from reposcanner.response import ResponseFactory +from reposcanner.requests import OfflineRoutineRequest, OnlineRoutineRequest, AnalysisRequestModel, BaseRequestModel +from reposcanner.response import ResponseFactory, ResponseModel from reposcanner.provenance import ReposcannerRunInformant -from reposcanner.data import DataEntityFactory +from reposcanner.data import DataEntityFactory, ReposcannerDataEntity +from reposcanner.git import Session import datetime +from typing import Type class DummyOfflineRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class DummyOfflineRoutine(OfflineRepositoryRoutine): @@ -21,10 +22,11 @@ class DummyOfflineRoutine(OfflineRepositoryRoutine): file that only contains metadata. """ - def getRequestType(self): + def getRequestType(self) -> type[BaseRequestModel]: return DummyOfflineRoutineRequest - def offlineImplementation(self, request, session): + def offlineImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, DummyOfflineRoutineRequest) factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_dummyOfflineData.csv".format( @@ -42,21 +44,7 @@ def offlineImplementation(self, request, session): class DummyOnlineRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class DummyOnlineRoutine(OnlineRepositoryRoutine): @@ -65,10 +53,11 @@ class DummyOnlineRoutine(OnlineRepositoryRoutine): repository, then will output a data file that only contains metadata. """ - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: return DummyOnlineRoutineRequest - def githubImplementation(self, request, session): + def githubImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, DummyOnlineRoutineRequest) factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_dummyOnlineData.csv".format( @@ -84,7 +73,8 @@ def githubImplementation(self, request, session): return responseFactory.createSuccessResponse( message="DummyOnlineRoutine completed!", attachments=output) - def gitlabImplementation(self, request, session): + def gitlabImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: + assert isinstance(request, DummyOnlineRoutineRequest) factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_dummyOnlineData.csv".format( @@ -102,7 +92,7 @@ def gitlabImplementation(self, request, session): class DummyAnalysisRequest(AnalysisRequestModel): - def criteriaFunction(self, entity): + def criteriaFunction(self, entity: ReposcannerDataEntity) -> bool: """ The DummyAnalysisRequest attempts to fetch all data from the data store which was created by a DummyOfflineRoutine or a DummyOnlineRoutine. @@ -118,9 +108,10 @@ def criteriaFunction(self, entity): class DummyAnalysis(DataAnalysis): - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: return DummyAnalysisRequest - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: + assert isinstance(request, DummyAnalysisRequest) responseFactory = ResponseFactory() return responseFactory.createSuccessResponse(message="DummyAnalysis completed!") diff --git a/src/reposcanner/git.py b/src/reposcanner/git.py index 6dce062..5221a4f 100644 --- a/src/reposcanner/git.py +++ b/src/reposcanner/git.py @@ -1,22 +1,30 @@ +# This future import allows us to reference a class in type annotations before it is declared. +from __future__ import annotations from enum import Enum, auto import re import urllib3 from abc import ABC, abstractmethod +from typing import Optional, NewType, Dict, List, Any, cast import github as pygithub import gitlab as pygitlab -import bitbucket as pybitbucket -import pygit2 +import bitbucket as pybitbucket # type: ignore +import pygit2 # type: ignore + + +# Placeholder +Session = Any class GitEntityFactory: def createRepositoryLocation( self, - url, - expectedPlatform=None, - expectedHostType=None, - expectedOwner=None, - expectedRepositoryName=None): + url: str, + expectedPlatform: Optional[RepositoryLocation.VersionControlPlatform] = None, + expectedHostType: Optional[RepositoryLocation.HostType] = None, + expectedOwner: Optional[str] = None, + expectedRepositoryName: Optional[str] = None, + ) -> RepositoryLocation: return RepositoryLocation(url=url, expectedPlatform=expectedPlatform, expectedHostType=expectedHostType, @@ -24,17 +32,21 @@ def createRepositoryLocation( expectedRepositoryName=expectedRepositoryName) def createVersionControlPlatformCredentials( - self, username=None, password=None, token=None): + self, + username: Optional[str] = None, + password: Optional[str] = None, + token: Optional[str] = None, + ) -> VersionControlPlatformCredentials: return VersionControlPlatformCredentials( username=username, password=password, token=token) - def createVCSAPISessionCompositeCreator(self): + def createVCSAPISessionCompositeCreator(self) -> VCSAPISessionCompositeCreator: return VCSAPISessionCompositeCreator() - def createGitHubAPISessionCreator(self): + def createGitHubAPISessionCreator(self) -> GitHubAPISessionCreator: return GitHubAPISessionCreator() - def createGitlabAPISessionCreator(self): + def createGitlabAPISessionCreator(self) -> GitlabAPISessionCreator: return GitlabAPISessionCreator() @@ -45,7 +57,7 @@ class VCSAPISessionCreator(ABC): sessions with those services. """ @abstractmethod - def canHandleRepository(self, repositoryLocation): + def canHandleRepository(self, repositoryLocation: RepositoryLocation) -> bool: """ Returns True if the creator knows how to connect to a given repository, and False otherwise. @@ -55,7 +67,11 @@ def canHandleRepository(self, repositoryLocation): pass @abstractmethod - def connect(self, repositoryLocation, credentials): + def connect( + self, + repositoryLocation: RepositoryLocation, + credentials: VersionControlPlatformCredentials, + ) -> Session: """ Attempts to establish a connection to a given repository using the credentials provided. @@ -71,40 +87,49 @@ class VCSAPISessionCompositeCreator(VCSAPISessionCreator): A Composite pattern class for VCSAPISessionCreators. """ - def __init__(self): - self._children = [] + def __init__(self) -> None: + self._children: List[VCSAPISessionCreator] = [] - def addChild(self, child): + def addChild(self, child: VCSAPISessionCreator) -> None: self._children.append(child) - def hasChild(self, child): + def hasChild(self, child: VCSAPISessionCreator) -> bool: return child in self._children - def getNumberOfChildren(self): + def getNumberOfChildren(self) -> int: return len(self._children) - def removeChild(self, child): + def removeChild(self, child: VCSAPISessionCreator) -> None: self._children.remove(child) - def canHandleRepository(self, repositoryLocation): + def canHandleRepository(self, repositoryLocation: RepositoryLocation) -> bool: for child in self._children: if child.canHandleRepository(repositoryLocation): return True return False - def connect(self, repositoryLocation, credentials): + def connect( + self, + repositoryLocation: RepositoryLocation, + credentials: VersionControlPlatformCredentials, + ) -> Session: for child in self._children: if child.canHandleRepository(repositoryLocation): return child.connect(repositoryLocation, credentials) + raise RuntimeError("No connector found for {}".format(repositoryLocation)) class GitHubAPISessionCreator(VCSAPISessionCreator): - def canHandleRepository(self, repositoryLocation): + def canHandleRepository(self, repositoryLocation: RepositoryLocation) -> bool: return repositoryLocation.getVersionControlPlatform( ) == RepositoryLocation.VersionControlPlatform.GITHUB - def connect(self, repositoryLocation, credentials): + def connect( + self, + repositoryLocation: RepositoryLocation, + credentials: VersionControlPlatformCredentials, + ) -> Session: # These status codes are caused by random GitHub errors which should # trigger a retry. status_forcelist = (500, 502, 504) @@ -129,16 +154,20 @@ def connect(self, repositoryLocation, credentials): "with no username/password or token in it.") repository = session.get_repo(repositoryLocation.getCanonicalName()) - return repository + return cast(Session, repository) class GitlabAPISessionCreator(VCSAPISessionCreator): - def canHandleRepository(self, repositoryLocation): + def canHandleRepository(self, repositoryLocation: RepositoryLocation) -> bool: return repositoryLocation.getVersionControlPlatform( ) == RepositoryLocation.VersionControlPlatform.GITLAB - def connect(self, repositoryLocation, credentials): + def connect( + self, + repositoryLocation: RepositoryLocation, + credentials: VersionControlPlatformCredentials, + ) -> Session: if credentials.hasTokenAvailable(): session = pygitlab.Gitlab( repositoryLocation.getURL(), @@ -154,18 +183,22 @@ def connect(self, repositoryLocation, credentials): "with no username/password or token in it.") repository = session.projects.get(repositoryLocation.getCanonicalName()) - return repository + return cast(Session, repository) class BitbucketAPISessionCreator(VCSAPISessionCreator): - def canHandleRepository(self, repositoryLocation): + def canHandleRepository(self, repositoryLocation: RepositoryLocation) -> bool: return repositoryLocation.getVersionControlPlatform( ) == RepositoryLocation.VersionControlPlatform.BITBUCKET - def connect(self, repositoryLocation, credentials): + def connect( + self, + repositoryLocation: RepositoryLocation, + credentials: VersionControlPlatformCredentials, + ) -> Session: # TODO: Need to figure out how to connect to the Bitbucket API. - pass + raise NotImplementedError() class RepositoryLocation: @@ -185,7 +218,7 @@ class HostType(Enum): SELFHOSTED = auto() UNKNOWN = auto() - def _guessPlatformFromURL(self): + def _guessPlatformFromURL(self) -> None: """ If the user does not explicitly state the expected platform (or type of platform) where the repository is located, we attempt to deduce this based on the URL. @@ -217,7 +250,7 @@ def _guessPlatformFromURL(self): self._platform = RepositoryLocation.VersionControlPlatform.UNKNOWN self._hostType = RepositoryLocation.HostType.UNKNOWN - def _guessOwnerAndRepositoryNameFromURL(self): + def _guessOwnerAndRepositoryNameFromURL(self) -> None: """ If the user does not explicitly state the expected owner and repository name, we attempt to deduce these based on the URL. @@ -267,11 +300,12 @@ def _guessOwnerAndRepositoryNameFromURL(self): def __init__( self, - url, - expectedPlatform=None, - expectedHostType=None, - expectedOwner=None, - expectedRepositoryName=None): + url: str, + expectedPlatform: Optional[VersionControlPlatform] = None, + expectedHostType: Optional[HostType] = None, + expectedOwner: Optional[str] = None, + expectedRepositoryName: Optional[str] = None, + ) -> None: """ Parameters: url: The URL to the repository. @@ -312,25 +346,27 @@ def __init__( else: self._guessOwnerAndRepositoryNameFromURL() - def getURL(self): + def getURL(self) -> str: return self._url - def getVersionControlPlatform(self): + def getVersionControlPlatform(self) -> VersionControlPlatform: return self._platform - def getVersionControlHostType(self): + def getVersionControlHostType(self) -> HostType: return self._hostType - def getOwner(self): + def getOwner(self) -> Optional[str]: + assert isinstance(self._owner, (str, type(None))) return self._owner - def getRepositoryName(self): + def getRepositoryName(self) -> Optional[str]: + assert isinstance(self._owner, (str, type(None))) return self._repositoryName - def getCanonicalName(self): + def getCanonicalName(self) -> str: return "{owner}/{repo}".format(owner=self._owner, repo=self._repositoryName) - def isRecognizable(self): + def isRecognizable(self) -> bool: return (self._platform != RepositoryLocation.VersionControlPlatform.UNKNOWN and self._hostType != RepositoryLocation.HostType.UNKNOWN and self._owner is not None and @@ -346,7 +382,12 @@ class VersionControlPlatformCredentials: If all of the above, then the username and password should take precedence. """ - def __init__(self, username=None, password=None, token=None): + def __init__( + self, + username: Optional[str] = None, + password: Optional[str] = None, + token: Optional[str] = None, + ) -> None: """ Parameters: username (@input): A string containing the client's handle. @@ -364,19 +405,19 @@ def __init__(self, username=None, password=None, token=None): raise ValueError( "Client did not supply a username/password or token. We need one of these in order to proceed!") - def hasUsernameAndPasswordAvailable(self): + def hasUsernameAndPasswordAvailable(self) -> bool: return self._username is not None and self._password is not None - def hasTokenAvailable(self): + def hasTokenAvailable(self) -> bool: return self._token is not None - def getUsername(self): + def getUsername(self) -> Optional[str]: return self._username - def getPassword(self): + def getPassword(self) -> Optional[str]: return self._password - def getToken(self): + def getToken(self) -> Optional[str]: return self._token @@ -388,30 +429,23 @@ class CredentialKeychain: they need from a keychain during construction. """ - def __init__(self, credentialsDictionary): + def __init__(self, credentialsDictionary: Dict[str, Dict[str, str]]) -> None: """ credentialsDictionary: A dictionary containing credentials information. """ - self._credentials = {} + self._credentials: Dict[str, VersionControlPlatformCredentials] = {} if not isinstance(credentialsDictionary, dict): raise TypeError("CredentialKeychain constructor expected to receive \ a dictionary object, but got a {wrongType} instead!".format( wrongType=type(credentialsDictionary))) - def safeAccess(dictionary, key): - """A convenience function for error-free access to a dictionary""" - if key in dictionary: - return dictionary[key] - else: - return None - for entryName in credentialsDictionary: entry = credentialsDictionary[entryName] - url = safeAccess(entry, "url") - username = safeAccess(entry, "username") - password = safeAccess(entry, "password") - token = safeAccess(entry, "token") + url = entry.get("url", None) + username = entry.get("username", None) + password = entry.get("password", None) + token = entry.get("token", None) if url is None: print("Reposcanner: Warning, the entry {entryName} in \ the credentials file is missing a URL. Skipping.".format( @@ -428,10 +462,10 @@ def safeAccess(dictionary, key): continue self._credentials[url] = credentialsObject - def __len__(self): + def __len__(self) -> int: return len(self._credentials) - def lookup(self, repositoryLocation): + def lookup(self, repositoryLocation: RepositoryLocation) -> Optional[VersionControlPlatformCredentials]: """ Fetches a key from a keychain based on a RepositoryLocation's URL. If the URL matches more than one entry (e.g. a platform-wide entry diff --git a/src/reposcanner/manager.py b/src/reposcanner/manager.py index ea108b4..21c4bcb 100644 --- a/src/reposcanner/manager.py +++ b/src/reposcanner/manager.py @@ -1,26 +1,38 @@ -from reposcanner.contrib import ContributorAccountListRoutine, OfflineCommitCountsRoutine, GambitCommitAuthorshipInferenceAnalysis -from reposcanner.contrib import CommitInfoMiningRoutine, OnlineCommitAuthorshipRoutine -from reposcanner.dummy import DummyOfflineRoutine, DummyOnlineRoutine, DummyAnalysis +# This future import allows us to reference a class in type annotations before it is declared. +from __future__ import annotations from reposcanner.git import CredentialKeychain -from reposcanner.data import DataEntityStore -from reposcanner.response import ResponseFactory -from reposcanner.routines import RepositoryRoutine, ExternalCommandLineToolRoutine -import datetime +from reposcanner.data import DataEntityStore, ReposcannerDataEntity, YAMLData +from reposcanner.response import ResponseFactory, ResponseModel +from reposcanner.routines import RepositoryRoutine, ExternalCommandLineToolRoutine, DataMiningRoutine +from reposcanner.requests import BaseRequestModel, AnalysisRequestModel, ExternalCommandLineToolRoutineRequest, RepositoryRoutineRequestModel, OnlineRoutineRequest +from reposcanner.analyses import DataAnalysis import logging import curses import sys +from typing import Sequence, Union, Optional, List, Iterable, Dict, Any, Tuple, cast, TYPE_CHECKING from tqdm import tqdm # For progress checking in non-GUI mode. from abc import ABC, abstractmethod +if TYPE_CHECKING: + # TYPE_CHECKING is false when actually executing + # This avoids an import cycle. + from reposcanner.provenance import AbstractLabNotebook + class TaskFactory: - def createManagerRepositoryRoutineTask(self, projectID, projectName, url, request): + def createManagerRepositoryRoutineTask( + self, + projectID: str, + projectName: str, + url: str, + request: RepositoryRoutineRequestModel, + ) -> ManagerTask: return ManagerRepositoryRoutineTask(projectID, projectName, url, request) - def createManagerExternalCommandLineToolTask(self, request): + def createManagerExternalCommandLineToolTask(self, request: BaseRequestModel) -> ManagerTask: return ManagerExternalCommandLineToolTask(request) - def createManagerAnalysisTask(self, request): + def createManagerAnalysisTask(self, request: BaseRequestModel) -> ManagerTask: return ManagerAnalysisTask(request) @@ -29,23 +41,29 @@ class ManagerTask(ABC): requests and responses that makes it easier for the frontend to display execution progress.""" - def __init__(self, request): + def __init__(self, request: BaseRequestModel) -> None: self._request = request - self._response = None + self._response: Optional[ResponseModel] = None - def getRequestClassName(self): + def getRequestClassName(self) -> str: return self._request.__class__.__name__ - def getRequest(self): + def getRequest(self) -> BaseRequestModel: return self._request - def hasResponse(self): + def hasResponse(self) -> bool: return self._response is not None - def getResponse(self): + def getResponse(self) -> ResponseModel: + assert self._response is not None return self._response - def process(self, agents, store, notebook): + def process( + self, + agents: Iterable[Union[DataMiningRoutine, DataAnalysis]], + store: DataEntityStore, + notebook: AbstractLabNotebook, + ) -> None: """ Scan through a set of available routines or analyses and see if any can execute the request held by this task. If no routines or analyses can handle @@ -63,7 +81,7 @@ def process(self, agents, store, notebook): if selectedAgent is not None: if notebook is not None: notebook.onTaskStart(self, store, selectedAgent) - if self._request.isAnalysisRequestType(): + if isinstance(self._request, AnalysisRequestModel): self._request.fetchDataFromStore(store) self._response = selectedAgent.run(self._request) if notebook is not None: @@ -73,10 +91,10 @@ def process(self, agents, store, notebook): self._response = responseFactory.createFailureResponse( message="No routine/analysis was found that could \ execute the request ({requestType}).".format( - requestType=type(request))) + requestType=type(self._request))) @abstractmethod - def getResponseDescription(self): + def getResponseDescription(self) -> str: """ Generate a string that describes the response to the request in a human-readable way. @@ -89,13 +107,13 @@ class ManagerRepositoryRoutineTask(ManagerTask): This Task class wraps requests and responses for RepositoryRoutines. """ - def __init__(self, projectID, projectName, url, request): + def __init__(self, projectID: str, projectName: str, url: str, request: RepositoryRoutineRequestModel) -> None: super().__init__(request) self._projectID = projectID self._projectName = projectName self._url = url - def getDescription(self): + def getDescription(self) -> str: """ Generates a string that describes the task. """ @@ -104,6 +122,7 @@ def getDescription(self): else: nameOrID = self._projectID + assert isinstance(self._request, RepositoryRoutineRequestModel) repositoryLocation = self._request.getRepositoryLocation() if repositoryLocation.isRecognizable(): canonicalRepoNameOrUrl = repositoryLocation.getCanonicalName() @@ -116,22 +135,23 @@ def getDescription(self): requestType=self._request.__class__.__name__ ) - def getProjectID(self): + def getProjectID(self) -> str: return self._projectID - def getProjectName(self): + def getProjectName(self) -> str: return self._projectName - def getURL(self): + def getURL(self) -> str: return self._url - def getResponseDescription(self): - repositoryLocation = self.getRequest().getRepositoryLocation() + def getResponseDescription(self) -> str: + assert isinstance(self._request, RepositoryRoutineRequestModel) + repositoryLocation = self._request.getRepositoryLocation() if repositoryLocation.isRecognizable(): canonicalRepoNameOrUrl = repositoryLocation.getCanonicalName() else: canonicalRepoNameOrUrl = self._url - + assert self._response is not None if self._response.wasSuccessful(): return "✅ Routine ({repoNameOrURL} --> {requestType}) was successful!".format( repoNameOrURL=canonicalRepoNameOrUrl, @@ -149,14 +169,15 @@ class ManagerExternalCommandLineToolTask(ManagerTask): This Task class wraps requests and responses for ExternalCommandLineToolRoutines. """ - def __init__(self, request): + def __init__(self, request: BaseRequestModel) -> None: super().__init__(request) - def getResponseDescription(self): + def getResponseDescription(self) -> str: """ Generate a string that describes the response to the request in a human-readable way. """ + assert self._response is not None if self._response.wasSuccessful(): return "✅ External Command Line Tool Request ({requestType}) was successful!".format( requestType=self.getRequest().__class__.__name__) @@ -170,10 +191,11 @@ class ManagerAnalysisTask(ManagerTask): This Task class wraps requests and responses for DataAnalyses. """ - def __init__(self, request): + def __init__(self, request: BaseRequestModel) -> None: super().__init__(request) - def getResponseDescription(self): + def getResponseDescription(self) -> str: + assert self._response is not None if self._response.wasSuccessful(): return "✅ Analysis ({requestType}) was successful!".format( requestType=self.getRequest().__class__.__name__ @@ -194,22 +216,23 @@ class ReposcannerManager: def __init__( self, - notebook=None, - outputDirectory="./", - workspaceDirectory="./", - gui=False): + notebook: Optional[AbstractLabNotebook] = None, + outputDirectory: str = "./", + workspaceDirectory: str = "./", + gui: bool = False, + ) -> None: self._notebook = notebook - self._repositoryRoutines = [] - self._externalCommandLineToolRoutines = [] - self._analyses = [] - self._tasks = [] - self._keychain = None + self._repositoryRoutines: List[RepositoryRoutine] = [] + self._externalCommandLineToolRoutines: List[ExternalCommandLineToolRoutine] = [] + self._analyses: List[DataAnalysis] = [] + self._tasks: List[ManagerRepositoryRoutineTask] = [] + self._keychain: Optional[CredentialKeychain] = None self._outputDirectory = outputDirectory self._workspaceDirectory = workspaceDirectory self._guiModeEnabled = gui self._store = DataEntityStore() - def initializeRoutinesAndAnalyses(self, configData): + def initializeRoutinesAndAnalyses(self, configData: Dict[str, Any]) -> None: """Constructs RepositoryRoutine and DataAnalysis objects that belong to the manager.""" if 'routines' in configData: @@ -224,7 +247,10 @@ def initializeRoutinesAndAnalyses(self, configData): routineName = routineEntry configParameters = None try: - routineClazz = getattr(sys.modules[__name__], routineName) + import reposcanner.contrib + import reposcanner.dummy + allRoutines = {**globals(), **reposcanner.contrib.__dict__, **reposcanner.dummy.__dict__} + routineClazz = allRoutines[routineName] routineInstance = routineClazz() routineInstance.setConfigurationParameters(configParameters) @@ -234,7 +260,8 @@ def initializeRoutinesAndAnalyses(self, configData): self._externalCommandLineToolRoutines.append(routineInstance) else: raise TypeError("ReposcannerManager does not know how to \ - handle this routine type: {routineType}".format(type(routineInstance))) + handle this routine type: {routineType}".format( + routineType=type(routineInstance))) except BaseException: raise ValueError( "Failed to instantiate routine matching name {name}".format( @@ -252,7 +279,11 @@ def initializeRoutinesAndAnalyses(self, configData): analysisName = analysisEntry configParameters = None try: - analysisClazz = getattr(sys.modules[__name__], analysisName) +# For dynamic lookup: + import reposcanner.contrib + import reposcanner.dummy + allAnalyses = {**globals(), **reposcanner.contrib.__dict__, **reposcanner.dummy.__dict__} + analysisClazz = allAnalyses[analysisName] analysisInstance = analysisClazz() analysisInstance.setConfigurationParameters(configParameters) self._analyses.append(analysisInstance) @@ -261,32 +292,32 @@ def initializeRoutinesAndAnalyses(self, configData): "Failed to instantiate analysis matching name {name}".format( name=analysisName)) - for routine in self._repositoryRoutines: + for r_routine in self._repositoryRoutines: if self._notebook is not None: - self._notebook.onRoutineCreation(routine) - for routine in self._externalCommandLineToolRoutines: + self._notebook.onRoutineCreation(r_routine) + for cmd_routine in self._externalCommandLineToolRoutines: if self._notebook is not None: - self._notebook.onRoutineCreation(routine) + self._notebook.onRoutineCreation(cmd_routine) for analysis in self._analyses: if self._notebook is not None: self._notebook.onAnalysisCreation(analysis) - def addDataEntityToStore(self, entity): + def addDataEntityToStore(self, entity: ReposcannerDataEntity) -> None: """ Allows the user to add additional data to the DataEntityStore prior to execution (e.g. from reposcanner-data) """ self._store.insert(entity) - def getRoutines(self): + def getRoutines(self) -> Sequence[DataMiningRoutine]: """ Provides a list of all routines available for the manager to delgate tasks to. Used for testing purposes. """ - return self._repositoryRoutines + self._externalCommandLineToolRoutines + return (*self._repositoryRoutines, *self._externalCommandLineToolRoutines) - def getRepositoryRoutines(self): + def getRepositoryRoutines(self) -> Sequence[RepositoryRoutine]: """ Provides a list of repository-mining routines available for the manager to delgate tasks to. @@ -294,7 +325,7 @@ def getRepositoryRoutines(self): """ return self._repositoryRoutines - def getExternalCommandLineToolRoutines(self): + def getExternalCommandLineToolRoutines(self) -> Sequence[ExternalCommandLineToolRoutine]: """ Provides a list of external command-line tool routines available for the manager to delgate tasks to. @@ -302,46 +333,51 @@ def getExternalCommandLineToolRoutines(self): """ return self._externalCommandLineToolRoutines - def getAnalyses(self): + def getAnalyses(self) -> Sequence[DataAnalysis]: """ Provides a list of analyses available for the manager to delgate tasks to. Used for testing purposes. """ return self._analyses - def isGUIModeEnabled(self): + def isGUIModeEnabled(self) -> bool: return self._guiModeEnabled - def buildTask(self, projectID, projectName, url, routineOrAnalysis): + def buildTask(self, projectID: str, projectName: str, url: str, routineOrAnalysis: Union[DataMiningRoutine, DataAnalysis]) -> ManagerTask: """Constructs a task to hold a request/response pair.""" requestType = routineOrAnalysis.getRequestType() - if requestType.isRoutineRequestType(): - if requestType.isExternalCommandLineToolRequestType(): - request = requestType(outputDirectory=self._outputDirectory) - task = ManagerExternalCommandLineToolTask(request) - return task - else: - if requestType.requiresOnlineAPIAccess(): - request = requestType(repositoryURL=url, - outputDirectory=self._outputDirectory, - keychain=self._keychain) - else: - request = requestType(repositoryURL=url, - outputDirectory=self._outputDirectory, - workspaceDirectory=self._workspaceDirectory) - task = ManagerRepositoryRoutineTask( - projectID=projectID, projectName=projectName, url=url, request=request) - return task - elif requestType.isAnalysisRequestType(): - request = requestType() - task = ManagerAnalysisTask(request) - return task + if issubclass(requestType, ExternalCommandLineToolRoutineRequest): + cmd_request = requestType(outputDirectory=self._outputDirectory) + cmd_task = ManagerExternalCommandLineToolTask(cmd_request) + return cmd_task + elif issubclass(requestType, OnlineRoutineRequest): + online_request = requestType(repositoryURL=url, + outputDirectory=self._outputDirectory, + keychain=self._keychain) + online_task = ManagerRepositoryRoutineTask( + projectID=projectID, projectName=projectName, url=url, request=online_request) + return online_task + elif issubclass(requestType, RepositoryRoutineRequestModel): + repo_request = requestType(repositoryURL=url, + outputDirectory=self._outputDirectory, + workspaceDirectory=self._workspaceDirectory) # type: ignore + repo_task = ManagerRepositoryRoutineTask( + projectID=projectID, projectName=projectName, url=url, request=repo_request) + return repo_task + elif issubclass(requestType, AnalysisRequestModel): + analysis_request = requestType() + analysis_task = ManagerAnalysisTask(analysis_request) + return analysis_task else: raise TypeError( "Encountered unrecognized request type when building task: {requestType}.".format( requestType=requestType)) - def prepareTasks(self, repositoryDictionary, credentialsDictionary): + def prepareTasks( + self, + repositoryDictionary: Dict[str, Dict[str, Any]], + credentialsDictionary: Dict[str, Dict[str, str]], + ) -> None: """Interpret the user's inputs so we know what repositories we need to collect data on and how we can access them.""" self._keychain = CredentialKeychain(credentialsDictionary) @@ -357,14 +393,18 @@ def prepareTasks(self, repositoryDictionary, credentialsDictionary): task = self.buildTask(projectID, projectName, url, routine) if self._notebook is not None: self._notebook.onTaskCreation(task) + if not isinstance(task, ManagerRepositoryRoutineTask): + raise TypeError("Tasks must be ManagerRepositoryRoutineTask not {}".format(type(task))) self._tasks.append(task) for analysis in self._analyses: task = self.buildTask(projectID, projectName, url, analysis) if self._notebook is not None: self._notebook.onTaskCreation(task) + if not isinstance(task, ManagerRepositoryRoutineTask): + raise TypeError("Tasks must be ManagerRepositoryRoutineTask not {}".format(type(task))) self._tasks.append(task) - def run(self, repositoriesDataFile, credentialsDataFile, configDataFile): + def run(self, repositoriesDataFile: YAMLData, credentialsDataFile: YAMLData, configDataFile: YAMLData) -> None: """ run() is the primary method that is called by the main function. This method starts Reposcanner's execution. @@ -377,15 +417,15 @@ def run(self, repositoriesDataFile, credentialsDataFile, configDataFile): else: self.executeWithGUI() - def executeWithNoGUI(self): + def executeWithNoGUI(self) -> None: """ Plain-text execution mode. """ for task in tqdm(self._tasks): + assert self._notebook is not None + #s: Tuple[Union[DataMiningRoutine, DataAnalysis], ...] = task.process( - self._repositoryRoutines + - self._externalCommandLineToolRoutines + - self._analyses, + (*self._repositoryRoutines, *self._externalCommandLineToolRoutines, *self._analyses), self._store, self._notebook) response = task.getResponse() @@ -394,13 +434,16 @@ def executeWithNoGUI(self): for attachment in response.getAttachments(): print(attachment) for attachment in response.getAttachments(): - self._store.insert(attachment) + if isinstance(attachment, ReposcannerDataEntity): + self._store.insert(attachment) + else: + print("Cannot store attachment of type {attachmentType}".format(attachmentType=str(type(attachment)))) - def executeWithGUI(self): + def executeWithGUI(self) -> None: """ Fancy Curses-based GUI execution mode. """ - def centerTextPosition(text, windowWidth): + def centerTextPosition(text: str, windowWidth: int) -> int: half_length_of_text = int(len(text) / 2) middle_column = int(windowWidth / 2) x_position = middle_column - half_length_of_text @@ -442,7 +485,7 @@ def centerTextPosition(text, windowWidth): int(screenWidth * 0.1)) messageWindow.border(2) messageWindow.refresh() - messages = [] + messages: List[str] = [] messageLimit = messageWindowHeight - 4 messages.insert(0, "Reposcanner Initalized") @@ -510,13 +553,16 @@ def centerTextPosition(text, windowWidth): footer.addstr(1, 4, taskDescription, curses.A_BOLD) footer.border(2) footer.refresh() + assert self._notebook is not None currentTask.process( - self._repositoryRoutines + - self._analyses, + (*self._repositoryRoutines, *self._analyses), self._store, self._notebook) for attachment in currentTask.getResponse().getAttachments(): - self._store.insert(attachment) + if isinstance(attachment, ReposcannerDataEntity): + self._store.insert(attachment) + else: + print("Cannot store attachment of type {attachmentType}".format(attachmentType=str(type(attachment)))) messages.insert(0, currentTask.getResponseDescription()) screen.refresh() diff --git a/src/reposcanner/provenance.py b/src/reposcanner/provenance.py index 2a22c43..00d0195 100644 --- a/src/reposcanner/provenance.py +++ b/src/reposcanner/provenance.py @@ -20,9 +20,10 @@ These additional artifacts can be quite useful depending on the application scenario; application developers may choose to capture and manage these separately, but outside of the scope of the provenance application. """ -from prov.dot import prov_to_dot -import prov.model as prov +from prov.dot import prov_to_dot # type: ignore +import prov.model as prov # type: ignore from abc import ABC, abstractmethod +import argparse import datetime import json import uuid @@ -30,6 +31,11 @@ import subprocess import os import reposcanner.data as dataEntities +from typing import Any, List, Union, TYPE_CHECKING +from reposcanner.requests import RepositoryRoutineRequestModel, AnalysisRequestModel +from reposcanner.routines import DataMiningRoutine, RepositoryRoutine +from reposcanner.analyses import DataAnalysis +from reposcanner.manager import ManagerRepositoryRoutineTask, ManagerTask """ trungdong/prov, a W3C-compliant provenance Data Model @@ -48,7 +54,7 @@ class ReposcannerRunInformant: # run of Reposcanner. EXECUTIONID = uuid.uuid1().hex - def getReposcannerExecutionID(self): + def getReposcannerExecutionID(self) -> str: """ Return a unique identifier string associated with the current run of Reposcanner. @@ -57,7 +63,7 @@ class is first loaded into memory by the interpreter. """ return ReposcannerRunInformant.EXECUTIONID - def getReposcannerVersion(self): + def getReposcannerVersion(self) -> str: """ Return a string indicating what version of Reposcanner was used for this run. Since we aren't yet versioning releases of the tool, this is the hash of the @@ -65,7 +71,9 @@ def getReposcannerVersion(self): """ try: completedProcess = subprocess.run( - ["git", "log", "--pretty=format:'%h'", "-n 1"]) + ["git", "log", "--pretty=format:'%h'", "-n 1"], + text=True, + ) return completedProcess.stdout except Exception as e: return "UNKNOWN" @@ -78,7 +86,7 @@ class AbstractLabNotebook(ABC): """ @abstractmethod - def onStartup(self, args): + def onStartup(self, args: argparse.Namespace) -> None: """ Called when Reposcanner is first initialized. @@ -87,14 +95,14 @@ def onStartup(self, args): pass @abstractmethod - def onExit(self): + def onExit(self) -> None: """ Called when Reposcanner has finished execution. """ pass @abstractmethod - def onRoutineCreation(self, routine): + def onRoutineCreation(self, routine: DataMiningRoutine) -> None: """ Called when a RepositoryRoutine object is created during initialization. @@ -103,7 +111,7 @@ def onRoutineCreation(self, routine): pass @abstractmethod - def onAnalysisCreation(self, analysis): + def onAnalysisCreation(self, analysis: DataAnalysis) -> None: """ Called when an DataAnalysis object is created during initialization. @@ -112,7 +120,7 @@ def onAnalysisCreation(self, analysis): pass @abstractmethod - def onTaskCreation(self, task): + def onTaskCreation(self, task: ManagerTask) -> None: """ Called when a ManagerTask object is created. @@ -121,7 +129,12 @@ def onTaskCreation(self, task): pass @abstractmethod - def onTaskStart(self, task, store, agent): + def onTaskStart( + self, + task: ManagerTask, + store: dataEntities.DataEntityStore, + agent: Union[DataMiningRoutine, DataAnalysis], + ) -> None: """ Called when a ManagerTask object is created. @@ -132,7 +145,11 @@ def onTaskStart(self, task, store, agent): pass @abstractmethod - def onTaskCompletion(self, task, agent): + def onTaskCompletion( + self, + task: ManagerTask, + agent: Union[DataMiningRoutine, DataAnalysis], + ) -> None: """ Called when a ManagerTask object has been processed and has received a response. @@ -142,7 +159,7 @@ def onTaskCompletion(self, task, agent): pass @abstractmethod - def publishNotebook(self): + def publishNotebook(self) -> None: """ Output the lab notebook's contents to a file. @@ -159,7 +176,7 @@ class ReposcannerLabNotebook(AbstractLabNotebook): class. """ - def __init__(self, notebookOutputDirectory): + def __init__(self, notebookOutputDirectory: str) -> None: """ notebookOutputDirectory: The directory where provenance files should be stored when calling publishNotebook(). @@ -181,20 +198,20 @@ def __init__(self, notebookOutputDirectory): {notebookOutputDirectory}.".format( notebookOutputDirectory=self._notebookOutputDirectory)) - def getJSONRepresentation(self): + def getJSONRepresentation(self) -> Any: """ Returns the underlying Prov document in JSON form for testing purposes. """ serialized = self._document.serialize() return json.loads(serialized) - def getProvnRepresentation(self): + def getProvnRepresentation(self) -> str: """ Returns the underlying Prov document in PROV-N form for testing purposes. """ return self._document.get_provn() - def onStartup(self, args): + def onStartup(self, args: argparse.Namespace) -> None: """ Called when Reposcanner is first initialized. @@ -230,13 +247,13 @@ def onStartup(self, args): self._document.wasInformedBy("rs:ReposcannerManager", credentialsListEntity) self._document.wasInformedBy("rs:ReposcannerManager", configListEntity) - def onExit(self): + def onExit(self) -> None: """ Called when Reposcanner has finished execution. """ pass - def onRoutineCreation(self, routine): + def onRoutineCreation(self, routine: DataMiningRoutine) -> None: """ Called when a RepositoryRoutine or ExternalCommandLineToolRoutine object is created during initialization. @@ -248,7 +265,7 @@ def onRoutineCreation(self, routine): clazz=routine.__class__.__name__)) self._document.actedOnBehalfOf(routine, "rs:ReposcannerManager") - def onAnalysisCreation(self, analysis): + def onAnalysisCreation(self, analysis: DataAnalysis) -> None: """ Called when an DataAnalysis object is created during initialization. @@ -259,14 +276,13 @@ def onAnalysisCreation(self, analysis): clazz=analysis.__class__.__name__)) self._document.actedOnBehalfOf(analysis, "rs:ReposcannerManager") - def onTaskCreation(self, task): + def onTaskCreation(self, task: ManagerTask) -> None: """ Called when a ManagerTask object is created. task: The ManagerTask object. """ - request = task.getRequest() - if request.isRoutineRequestType(): + if isinstance(task, ManagerRepositoryRoutineTask): task = self._document.activity("rs:task{taskid}".format(taskid=id(task)), other_attributes=( ('rs:requestType', task.getRequestClassName()), ('rs:projectID', task.getProjectID()), @@ -282,7 +298,12 @@ def onTaskCreation(self, task): self._document.wasGeneratedBy("rs:ReposcannerManager", task) - def onTaskStart(self, task, store, agent): + def onTaskStart( + self, + task: ManagerTask, + store: dataEntities.DataEntityStore, + agent: Union[DataMiningRoutine, DataAnalysis], + ) -> None: """ Called when a ManagerTask object is created. @@ -300,7 +321,7 @@ def onTaskStart(self, task, store, agent): # If the request is an analysis request, we can probe the request to see which # files it intends to grab from the data store. request = task.getRequest() - if request.isAnalysisRequestType(): + if isinstance(request, AnalysisRequestModel): filesToBeUsedInAnalysis = store.getByCriteria(request.getDataCriteria()) for entity in filesToBeUsedInAnalysis: entityID = None @@ -311,7 +332,7 @@ def onTaskStart(self, task, store, agent): objID=id(entity)) self._document.usage(taskID, entityID) - def logAdditionalDataEntity(self, entity): + def logAdditionalDataEntity(self, attachment: dataEntities.ReposcannerDataEntity) -> None: """ Convenience method added to enable us to log reposcanner-data files added to the data store at start-up. @@ -320,12 +341,16 @@ def logAdditionalDataEntity(self, entity): dataEntity = self._document.entity(dataEntityID, ( (prov.PROV_TYPE, "File"), ('rs:executionID', attachment.getReposcannerExecutionID()), - ('rs:path', attachment.getFilePath()), + ('rs:path', str(attachment.getFilePath())), ('rs:creator', attachment.getCreator()), - ('rs:md5hash', str(entity.getMD5Hash())), + ('rs:md5hash', str(attachment.getMD5Hash())), )) - def onTaskCompletion(self, task, agent): + def onTaskCompletion( + self, + task: ManagerTask, + agent: Union[DataMiningRoutine, DataAnalysis], + ) -> None: """ Called when a ManagerTask object has been processed and has received a response. @@ -352,7 +377,7 @@ def onTaskCompletion(self, task, agent): dataEntity = self._document.entity(dataEntityID, ( (prov.PROV_TYPE, "File"), ('rs:executionID', attachment.getReposcannerExecutionID()), - ('rs:path', attachment.getFilePath()), + ('rs:path', str(attachment.getFilePath())), ('rs:creator', attachment.getCreator()), ('rs:dateCreated', str(attachment.getDateCreated())), ('rs:md5hash', str(md5Hash)), @@ -379,7 +404,7 @@ def onTaskCompletion(self, task, agent): self._document.wasGeneratedBy(dataEntityID, taskID) self._document.wasAttributedTo(dataEntityID, agentID) - def publishNotebook(self): + def publishNotebook(self) -> None: """ Output the lab notebook's contents to a file. @@ -425,7 +450,7 @@ def publishNotebook(self): versionInfo=versionInfo, executionID=executionID)) # TODO: Collect all the names of the routines/analyses used. - routinesAndAnalyses = [] + routinesAndAnalyses: List[str] = [] markdownFile.write("## Reposcanner\n") markdownFile.write( diff --git a/src/reposcanner/reposcanner.py b/src/reposcanner/reposcanner.py index ea17761..bfd79af 100644 --- a/src/reposcanner/reposcanner.py +++ b/src/reposcanner/reposcanner.py @@ -13,12 +13,16 @@ # TODO: Update this to be more generic (e.g. allow for reloading of data generated on) previous runs. # If the file doesn't have readable metadata associated with it, we can just say the creator is # "external" or unlisted. -def loadReposcannerData(reposcannerDataDirectory, notebook, manager): +def loadReposcannerData( + reposcannerDataDirectory: str, + notebook: provenance.ReposcannerLabNotebook, + manager: ReposcannerManager, +) -> None: """ Read in additional data files held by bssw-psip/reposcanner-data and add them to the manager's data store. """ - def setupDataEntity(path): + def setupDataEntity(path: str) -> data.ReposcannerDataEntity: informant = provenance.ReposcannerRunInformant() dataEntityFactory = data.DataEntityFactory() dataEntity = dataEntityFactory.createAnnotatedCSVData(path) @@ -50,12 +54,12 @@ def setupDataEntity(path): filePath = "{datadir}/{name}".format( datadir=reposcannerDataDirectory, name=fileName) if os.path.exists(filePath): - dataEntity = setupDataEntity(path) + dataEntity = setupDataEntity(filePath) manager.addDataEntityToStore(dataEntity) notebook.logAdditionalDataEntity(dataEntity) -def scannerMain(args): +def scannerMain(args: argparse.Namespace) -> None: """ The master routine for Reposcanner. """ @@ -127,7 +131,7 @@ def scannerMain(args): """ -def run(): +def run() -> None: """Calls :func:`scannerMain` passing the CLI arguments extracted from :obj:`sys.argv` This function can be used as entry point to create console scripts with setuptools. diff --git a/src/reposcanner/requests.py b/src/reposcanner/requests.py index 59503bc..96f4b50 100644 --- a/src/reposcanner/requests.py +++ b/src/reposcanner/requests.py @@ -1,5 +1,7 @@ import datetime import reposcanner.git as gitEntities +from reposcanner.data import ReposcannerDataEntity, DataEntityStore +from typing import List, Optional, Callable import os from pathlib import Path from abc import ABC, abstractmethod @@ -11,36 +13,23 @@ class BaseRequestModel: form of a request model which routines/analyses understand. """ - def __init__(self): - self._errors = [] + def __init__(self) -> None: + self._errors: List[str] = [] - def addError(self, message): + def addError(self, message: str) -> None: self._errors.append(message) - def hasErrors(self): + def hasErrors(self) -> bool: return len(self._errors) > 0 - def getErrors(self): + def getErrors(self) -> List[str]: return self._errors - @classmethod - def isRoutineRequestType(cls): - return False - - @classmethod - def isAnalysisRequestType(cls): - return False - class AnalysisRequestModel(BaseRequestModel): - - @classmethod - def isAnalysisRequestType(cls): - return True - - def __init__(self, outputDirectory="./"): + def __init__(self, outputDirectory: str = "./") -> None: super().__init__() - self._data = [] + self._data: List[ReposcannerDataEntity] = [] try: self._outputDirectory = outputDirectory if not os.path.isdir( @@ -62,10 +51,10 @@ def __init__(self, outputDirectory="./"): {outputDirectory}: {exception}".format(outputDirectory=self._outputDirectory, exception=exception)) - def getOutputDirectory(self): + def getOutputDirectory(self) -> str: return self._outputDirectory - def criteriaFunction(self, entity): + def criteriaFunction(self, entity: ReposcannerDataEntity) -> bool: """ Classes that inherit from AnalysisRequestModel must override the criteriaFunction to describe the data @@ -81,7 +70,7 @@ def criteriaFunction(self, entity): """ return True - def getDataCriteria(self): + def getDataCriteria(self) -> Callable[[ReposcannerDataEntity], bool]: """ This is called to get the criteria function, which is passed to DataEntityStore.getByCriteria() to retrieve the data which @@ -89,7 +78,7 @@ def getDataCriteria(self): """ return self.criteriaFunction - def fetchDataFromStore(self, store): + def fetchDataFromStore(self, store: DataEntityStore) -> None: """ This is called by ManagerTask.process() prior to running an analysis request. It loads any data that fits the request's criteria into the request. @@ -99,7 +88,7 @@ def fetchDataFromStore(self, store): for entity in store.getByCriteria(self.getDataCriteria()): self._data.append(entity) - def getData(self): + def getData(self) -> List[ReposcannerDataEntity]: """ Get any stored data associated with this request. Called by the DataAnalysis instance responsible for handling the request. @@ -113,7 +102,7 @@ class ExternalCommandLineToolRoutineRequest(BaseRequestModel): form of a request model which repository-mining routines understand. """ - def __init__(self, outputDirectory): + def __init__(self, outputDirectory: str) -> None: """ parameters: outputDirectory (@input): The directory where files generated @@ -142,17 +131,9 @@ def __init__(self, outputDirectory): {outputDirectory}: {exception}".format(outputDirectory=self._outputDirectory, exception=exception)) - def getOutputDirectory(self): + def getOutputDirectory(self) -> str: return self._outputDirectory - @classmethod - def isRoutineRequestType(cls): - return True - - @classmethod - def isExternalCommandLineToolRequestType(cls): - return True - class RepositoryRoutineRequestModel(BaseRequestModel): """ @@ -160,7 +141,7 @@ class RepositoryRoutineRequestModel(BaseRequestModel): form of a request model which repository-mining routines understand. """ - def __init__(self, repositoryURL, outputDirectory): + def __init__(self, repositoryURL: str, outputDirectory: str) -> None: """ parameters: repositoryURL (@input): A URL to a version control @@ -172,7 +153,6 @@ def __init__(self, repositoryURL, outputDirectory): """ super().__init__() factory = gitEntities.GitEntityFactory() - self._repositoryLocation = None try: self._repositoryLocation = factory.createRepositoryLocation( url=repositoryURL) @@ -207,20 +187,12 @@ def __init__(self, repositoryURL, outputDirectory): {outputDirectory}: {exception}".format(outputDirectory=self._outputDirectory, exception=exception)) - def getRepositoryLocation(self): + def getRepositoryLocation(self) -> gitEntities.RepositoryLocation: return self._repositoryLocation - def getOutputDirectory(self): + def getOutputDirectory(self) -> str: return self._outputDirectory - @classmethod - def isRoutineRequestType(cls): - return True - - @classmethod - def isExternalCommandLineToolRequestType(cls): - return False - class OnlineRoutineRequest(RepositoryRoutineRequestModel): """ @@ -228,22 +200,15 @@ class OnlineRoutineRequest(RepositoryRoutineRequestModel): Request classes for OnlineRepositoryRoutine should inherit from this class. """ - @classmethod - def requiresOnlineAPIAccess(cls): - """ - Tells the caller whether this request requires access to an online - version control API. - """ - return True - def __init__( self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): + repositoryURL: str, + outputDirectory: str, + username: Optional[str] = None, + password: Optional[str] = None, + token: Optional[str] = None, + keychain: Optional[gitEntities.CredentialKeychain] = None, + ) -> None: """ Additional Parameters: @@ -259,8 +224,8 @@ def __init__( """ super().__init__(repositoryURL, outputDirectory) - self._credentials = None factory = gitEntities.GitEntityFactory() + self._credentials: Optional[gitEntities.VersionControlPlatformCredentials] if keychain is None: try: self._credentials = factory.createVersionControlPlatformCredentials( @@ -268,15 +233,20 @@ def __init__( password=password, token=token) except Exception as exception: - self.addError("Encountered an unexpected exception \ - while constructing credentials object: {exception}".format(exception=exception)) + self.addError( + "Encountered an unexpected exception while constructing " + "credentials object: {exception}".format(exception=exception)) else: self._credentials = keychain.lookup(self.getRepositoryLocation()) if self._credentials is None: - self.addError("Failed to find a matching set of credentials \ - on the keychain (out of {numberOfCredentials} credentials) corresponding to the URL of the repository ({URL}).".format(numberOfCredentials=len(keychain), URL=repositoryURL)) + self.addError( + "Failed to find a matching set of credentials on the " + "keychain (out of {numberOfCredentials} credentials) " + "corresponding to the URL of the repository ({URL})." + .format(numberOfCredentials=len(keychain), URL=repositoryURL) + ) - def getCredentials(self): + def getCredentials(self) -> Optional[gitEntities.VersionControlPlatformCredentials]: return self._credentials @@ -285,16 +255,7 @@ class OfflineRoutineRequest(RepositoryRoutineRequestModel): The base class for requests to routines that operate on an offline clone to compute results. Request classes for OfflineRepositoryRoutine should inherit from this class. """ - - @classmethod - def requiresOnlineAPIAccess(cls): - """ - Tells the caller whether this request requires access to an online - version control API. - """ - return False - - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): + def __init__(self, repositoryURL: str, outputDirectory: str, workspaceDirectory: str) -> None: """ Additional Parameters: @@ -310,18 +271,18 @@ def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): self.addError( "The workspace directory {workspaceDirectory} either does not exist or \ is not a valid directory.".format( - outputDirectory=self._workspaceDirectory)) + workspaceDirectory=self._workspaceDirectory)) except Exception as exception: self.addError("Encountered an unexpected exception \ while parsing workspace directory \ {workspaceDirectory}: {exception}".format(workspaceDirectory=self._workspaceDirectory, exception=exception)) - def getCloneDirectory(self): + def getCloneDirectory(self) -> Path: return Path(self._workspaceDirectory) \ / "{repoOwner}_{repoName}".format( repoOwner=self.getRepositoryLocation().getOwner(), repoName=self.getRepositoryLocation().getRepositoryName()) - def getWorkspaceDirectory(self): + def getWorkspaceDirectory(self) -> str: return self._workspaceDirectory diff --git a/src/reposcanner/response.py b/src/reposcanner/response.py index e9cccf9..36ca152 100644 --- a/src/reposcanner/response.py +++ b/src/reposcanner/response.py @@ -1,6 +1,13 @@ +# This future import allows us to reference a class in type annotations before it is declared. +from __future__ import annotations import abc from enum import Enum import collections +from typing import Optional, List, Iterable, Union, Any +from reposcanner.data import ReposcannerDataEntity + + +AttachmentType = Union[ReposcannerDataEntity, str, Exception] class ResponseStatus(Enum): @@ -16,12 +23,20 @@ class ResponseFactory: A factory for churning out response model objects. Classes should use this factory to construct responses. """ - def createSuccessResponse(self, message=None, attachments=None): + def createSuccessResponse( + self, + message: Optional[str] = None, + attachments: Union[None, AttachmentType, Iterable[AttachmentType]] = None, + ) -> ResponseModel: return ResponseModel(status=ResponseStatus.SUCCESS, message=message, attachments=attachments) - def createFailureResponse(self, message=None, attachments=None): + def createFailureResponse( + self, + message: Optional[str] = None, + attachments: Union[None, AttachmentType, Iterable[AttachmentType]] = None, + ) -> ResponseModel: return ResponseModel(status=ResponseStatus.FAILURE, message=message, attachments=attachments) @@ -34,7 +49,12 @@ class ResponseModel: of how that data is presented to a client. """ - def __init__(self, status, message=None, attachments=None): + def __init__( + self, + status: ResponseStatus, + message: Optional[str] = None, + attachments: Union[None, AttachmentType, Iterable[AttachmentType]] = None, + ) -> None: """ Parameters ---------- @@ -50,37 +70,41 @@ def __init__(self, status, message=None, attachments=None): """ self._status = status self._message = message - self._attachments = [] + self._attachments: List[AttachmentType] = [] - def isIterable(obj): + def isIterable(obj: Any) -> bool: try: iter(obj) return True except TypeError as e: return False if attachments is not None: - if isIterable(attachments) and not isinstance(attachments, str): + if isinstance(attachments, Iterable) and isIterable(attachments) and not isinstance(attachments, str): for attachment in attachments: self._attachments.append(attachment) - else: + elif isinstance(attachments, (str, ReposcannerDataEntity, Exception)): self._attachments.append(attachments) + else: + raise TypeError("Invalid attachment type {attachmentType}".format( + attachmentType=str(type(attachments)))) + - def hasMessage(self): + def hasMessage(self) -> bool: return self._message is not None - def getMessage(self): + def getMessage(self) -> Optional[str]: return self._message - def hasAttachments(self): + def hasAttachments(self) -> bool: return len(self._attachments) != 0 - def getAttachments(self): + def getAttachments(self) -> List[AttachmentType]: return self._attachments - def addAttachment(self, attachment): + def addAttachment(self, attachment: AttachmentType) -> None: self._attachments.append(attachment) - def wasSuccessful(self): + def wasSuccessful(self) -> bool: if self._status == ResponseStatus.SUCCESS: return True else: diff --git a/src/reposcanner/routines.py b/src/reposcanner/routines.py index 5c7f4f8..2336bd6 100644 --- a/src/reposcanner/routines.py +++ b/src/reposcanner/routines.py @@ -1,9 +1,11 @@ from abc import ABC, abstractmethod import urllib3 import os -import pygit2 -from reposcanner.git import GitEntityFactory, RepositoryLocation -from reposcanner.response import ResponseFactory +import pygit2 # type: ignore +from reposcanner.git import GitEntityFactory, RepositoryLocation, VCSAPISessionCreator, Session +from reposcanner.response import ResponseFactory, ResponseModel +from reposcanner.requests import BaseRequestModel, ExternalCommandLineToolRoutineRequest, RepositoryRoutineRequestModel, OfflineRoutineRequest, OnlineRoutineRequest +from typing import Optional, Dict, Any, Type, TYPE_CHECKING class DataMiningRoutine(ABC): @@ -11,7 +13,7 @@ class DataMiningRoutine(ABC): The abstract base class for all data mining routines. """ - def canHandleRequest(self, request): + def canHandleRequest(self, request: BaseRequestModel) -> bool: """ Returns True if the routine is capable of handling the request (i.e. the RequestModel is of the type that the routine expects), and False otherwise. @@ -22,14 +24,14 @@ def canHandleRequest(self, request): return False @abstractmethod - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: """ Returns the class object for the routine's companion request type. """ pass @abstractmethod - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: """ Contains the code for executing the data mining operations. @@ -39,7 +41,7 @@ def execute(self, request): """ pass - def run(self, request): + def run(self, request: BaseRequestModel) -> ResponseModel: """ Encodes the workflow of a DataMiningRoutine object. The client only needs to run this method in order to get results. @@ -47,7 +49,7 @@ def run(self, request): response = self.execute(request) return response - def hasConfigurationParameters(self): + def hasConfigurationParameters(self) -> bool: """ Checks whether the routine object was passed configuration parameters, whether valid or not. Routines are not required to do anything with parameters @@ -59,7 +61,7 @@ def hasConfigurationParameters(self): except BaseException: return False - def getConfigurationParameters(self): + def getConfigurationParameters(self) -> Optional[Dict[str, Any]]: """ Returns the configuration parameters assigned to the routine. """ @@ -69,7 +71,7 @@ def getConfigurationParameters(self): except BaseException: return None - def setConfigurationParameters(self, configParameters): + def setConfigurationParameters(self, configParameters: Dict[str, Any]) -> None: """ Assigns configuration parameters to a newly created routine. """ @@ -88,15 +90,7 @@ class ExternalCommandLineToolRoutine(DataMiningRoutine): provenance for their use. """ - @abstractmethod - def isExternalToolAvailable(self): - """ - Checks to see whether the tool can be called on the command-line. - This method should return True if so, False if not. - """ - pass - - def commandLineToolImplementation(self, request): + def commandLineToolImplementation(self, request: BaseRequestModel) -> ResponseModel: """ This method should contain an implementation that calls the command-line tool and handles any information it gets back from that tool. @@ -110,16 +104,16 @@ def commandLineToolImplementation(self, request): message="This routine has no implementation available\ to call the command-line tool.") - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: responseFactory = ResponseFactory() - if not self.canHandleRequest(request): + if not self.canHandleRequest(request) or not isinstance(request, ExternalCommandLineToolRoutineRequest): return responseFactory.createFailureResponse( message="The routine was passed a request of the wrong type.") elif request.hasErrors(): return responseFactory.createFailureResponse( message="The request had errors in it and cannot be processed.", attachments=request.getErrors()) - elif not self.isExternalToolAvailable(): + elif not isinstance(request, ExternalCommandLineToolRoutineRequest): return responseFactory.createFailureResponse( message="The command-line tool required by this routine is not available or\ is otherwise unable to be called.") @@ -137,14 +131,14 @@ class OfflineRepositoryRoutine(RepositoryRoutine): Class that encapsulates the stages of a PyGit2-based analysis procedure operating on a clone of a repository. """ - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: """ The Offline routine execute() method delegates responsibility for performing the routine to the offlineImplementation() method. Subclasses of this class are responsible for overriding that methods. """ responseFactory = ResponseFactory() - if not self.canHandleRequest(request): + if not self.canHandleRequest(request) or not isinstance(request, OfflineRoutineRequest): return responseFactory.createFailureResponse( message="The routine was passed a request of the wrong type.") elif request.hasErrors(): @@ -154,7 +148,7 @@ def execute(self, request): else: try: if not os.path.exists(request.getCloneDirectory()): - def init_remote(repo, name, url): + def init_remote(repo, name: str, url: str): # Create the remote with a mirroring url remote = repo.remotes.create( name, url, "+refs/heads/*:refs/heads/*") @@ -163,6 +157,7 @@ def init_remote(repo, name, url): #repo.config[mirror_var] = True # Return the remote, which pygit2 will use to perform the clone return remote + session = pygit2.clone_repository( request.getRepositoryLocation().getURL(), request.getCloneDirectory(), @@ -177,8 +172,9 @@ def init_remote(repo, name, url): return responseFactory.createFailureResponse( message="OfflineRepositoryRoutine Encountered an unexpected exception ({etype}).".format( etype=type(e)), attachments=[e]) + - def offlineImplementation(self, request, session): + def offlineImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: """ This method should contain the PyGit2-based implementation of the routine. By default, it'll return a failure response. Subclasses are responsible for @@ -198,23 +194,23 @@ class OnlineRepositoryRoutine(RepositoryRoutine): Class that encapsulates the stages of an PyGitHub-based analysis procedure operating on the GitHub API. """ - def __init__(self): + def __init__(self) -> None: factory = GitEntityFactory() compositeCreator = factory.createVCSAPISessionCompositeCreator() githubCreator = factory.createGitHubAPISessionCreator() gitlabCreator = factory.createGitlabAPISessionCreator() compositeCreator.addChild(githubCreator) compositeCreator.addChild(gitlabCreator) - self._sessionCreator = compositeCreator + self._sessionCreator: VCSAPISessionCreator = compositeCreator - def execute(self, request): + def execute(self, request: BaseRequestModel) -> ResponseModel: """ The Online routine execute() method delegates responsibility for performing the routine to platform-API-specific methods. Subclasses of this class are responsible for overriding those methods. """ responseFactory = ResponseFactory() - if not self.canHandleRequest(request): + if not self.canHandleRequest(request) or not isinstance(request, OnlineRoutineRequest): return responseFactory.createFailureResponse( message="The routine was passed a request of the wrong type.") elif request.hasErrors(): @@ -230,6 +226,10 @@ def execute(self, request): platform = request.getRepositoryLocation().getVersionControlPlatform() repositoryLocation = request.getRepositoryLocation() credentials = request.getCredentials() + if credentials is None: + return responseFactory.createFailureResponse( + message="request.getCredentials() returned None", + ) try: if platform == RepositoryLocation.VersionControlPlatform.GITHUB: return self.githubImplementation( @@ -254,18 +254,18 @@ def execute(self, request): attachments=[e]) @property - def sessionCreator(self): + def sessionCreator(self) -> VCSAPISessionCreator: """We expose this attribute for testing/validation purposes. Normally the session creator isn't touched after construction.""" return self._sessionCreator @sessionCreator.setter - def sessionCreator(self, sessionCreator): + def sessionCreator(self, sessionCreator: VCSAPISessionCreator) -> None: """We expose this attribute for testing/validation purposes. Normally the session creator isn't touched after construction.""" self._sessionCreator = sessionCreator - def githubImplementation(self, request, session): + def githubImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: """ This method should contain the GitHub API implementation of the routine. By default, it'll return a failure response. Subclasses are responsible for @@ -279,7 +279,7 @@ def githubImplementation(self, request, session): message="This routine has no implementation available \ to handle a GitHub repository.") - def gitlabImplementation(self, request, session): + def gitlabImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: """ This method should contain the GitHub API implementation of the routine. By default, it'll return a failure response. Subclasses are responsible for @@ -293,7 +293,7 @@ def gitlabImplementation(self, request, session): message="This routine has no implementation available \ to handle a Gitlab repository.") - def bitbucketImplementation(self, request, session): + def bitbucketImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: """ This method should contain the GitHub API implementation of the routine. By default, it'll return a failure response. Subclasses are responsible for diff --git a/src/reposcanner/util.py b/src/reposcanner/util.py new file mode 100644 index 0000000..1bc9c87 --- /dev/null +++ b/src/reposcanner/util.py @@ -0,0 +1,8 @@ +from typing import Optional + + +def replaceNoneWithEmptyString(value: Optional[str]) -> str: + if value is None: + return "" + else: + return value diff --git a/tests/test_baseRoutines.py b/tests/test_baseRoutines.py index c8b5ee7..04ca264 100644 --- a/tests/test_baseRoutines.py +++ b/tests/test_baseRoutines.py @@ -5,6 +5,28 @@ import reposcanner.response as responses +# TODO: These tests use monkeypatching in a way that can affect other tests. +# The test will monkeypatch a method, do an assertion, and then restore the method. +# However, when the assertion fails, the method never gets restored. +# Then, unrelated tests will start to fail, which makes it harder to localize the error. +# Where possible, these tests should monkeypatch individual objects rather than classes. +# Rather than: +# +# originalMethod = Clazz.method +# Clazz.method = newMethod +# obj = Clazz() +# assert foo(obj) +# Clazz.method = originalMethod +# +# I propose: +# +# obj = Clazz() +# obj.method = newMethod +# assert foo(obj) +# +# Clazz remains unaffected. + + def test_RepositoryRoutine_isConstructibleWithMockImplementation(mocker): mocker.patch.multiple(routines.RepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.RepositoryRoutine() @@ -103,7 +125,7 @@ def test_ExternalCommandLineToolRoutine_canSetConfigurationParameters(mocker): assert(genericRoutine.getConfigurationParameters() == configurationParameters) -def test_OnlineRepositoryRoutine_isConstructibleWithMockImplementation(mocker): +def test_OfflineRepositoryRoutine_isConstructibleWithMockImplementation(mocker): mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OfflineRepositoryRoutine() @@ -136,8 +158,8 @@ def canAlwaysHandleRequest(self, request): routines.OfflineRepositoryRoutine.canHandleRequest = canAlwaysHandleRequest genericRoutine = routines.OfflineRepositoryRoutine() - genericRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") + genericRequest = requests.OfflineRoutineRequest( + repositoryURL="https://github.com/owner/repo", outputDirectory="./out", workspaceDirectory="./workspace") genericRequest.addError(message="Something has gone horribly wrong.") response = genericRoutine.run(genericRequest) assert(not response.wasSuccessful()) @@ -161,8 +183,8 @@ def canNeverHandleRequest(self, request): routines.OnlineRepositoryRoutine.canHandleRequest = canNeverHandleRequest genericRoutine = routines.OnlineRepositoryRoutine() - genericRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") + genericRequest = requests.OnlineRoutineRequest( + repositoryURL="https://github.com/owner/repo", outputDirectory="./out") response = genericRoutine.run(genericRequest) assert(not response.wasSuccessful()) assert(response.hasMessage()) @@ -179,7 +201,7 @@ def canAlwaysHandleRequest(self, request): routines.OnlineRepositoryRoutine.canHandleRequest = canAlwaysHandleRequest genericRoutine = routines.OnlineRepositoryRoutine() - genericRequest = requests.RepositoryRoutineRequestModel( + genericRequest = requests.OnlineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./") genericRequest.addError(message="Something has gone horribly wrong.") response = genericRoutine.run(genericRequest) @@ -203,11 +225,12 @@ def canAlwaysHandleRequest(self, request): emptyAPICreator = gitEntityFactory.createVCSAPISessionCompositeCreator() genericRoutine.sessionCreator = emptyAPICreator - genericRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") + genericRequest = requests.OnlineRoutineRequest( + repositoryURL="https://github.com/owner/repo", outputDirectory="./", username="foo", password="bar") response = genericRoutine.run(genericRequest) assert(not response.wasSuccessful()) assert(response.hasMessage()) + print(response.getMessage(), response.getAttachments()) assert("to handle the platform of the repository" in response.getMessage()) routines.OnlineRepositoryRoutine.canHandleRequest = originalCanHandleRequest diff --git a/tests/test_data.py b/tests/test_data.py index 82c7dd1..d0acaa4 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -1,3 +1,4 @@ +from pathlib import Path import pytest import reposcanner.data as data import datetime @@ -14,7 +15,7 @@ def test_AnnotatedCSVData_isConstructibleByFactory(): def test_AnnotatedCSVData_canGetFilePath(): dataEntity = data.AnnotatedCSVData("test.csv") - assert(dataEntity.getFilePath() == "test.csv") + assert(dataEntity.getFilePath() == Path("test.csv")) def test_AnnotatedCSVData_canGetKeysForMetadataAfterConstruction(): @@ -159,7 +160,7 @@ def test_YAMLData_isDirectlyConstructible(): dataEntity = data.YAMLData("test.yaml") -def test_AnnotatedCSVData_isConstructibleByFactory(): +def test_AnnotatedYAMLData_isConstructibleByFactory(): factory = data.DataEntityFactory() factory.createYAMLData("test.yaml") diff --git a/tests/test_manager.py b/tests/test_manager.py index f56b1f5..e4fa9e6 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -29,7 +29,7 @@ def test_ManagerAnalysisTask_isDirectlyConstructible(): task = management.ManagerAnalysisTask(request=requests.AnalysisRequestModel()) -def test_ManagerRepositoryRoutineTask_isConstructibleByFactory(): +def test_ManagerAnalysisTask_isConstructibleByFactory(): factory = management.TaskFactory() task = factory.createManagerAnalysisTask(request=requests.AnalysisRequestModel()) diff --git a/tests/test_requests.py b/tests/test_requests.py index 0ff1df9..50a7293 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -9,12 +9,6 @@ def test_AnalysisRequestModel_isDirectlyConstructible(): analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") -def test_AnalysisRequestModel_isAnAnalysisRequestType(): - analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") - assert(analysisRequest.isAnalysisRequestType()) - assert(not analysisRequest.isRoutineRequestType()) - - def test_AnalysisRequestModel_hasNoErrorsForValidInput(): analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") assert(not analysisRequest.hasErrors()) @@ -52,13 +46,6 @@ def test_ExternalCommandLineToolRoutineRequest_isDirectlyConstructible(): requests.ExternalCommandLineToolRoutineRequest(outputDirectory="./") -def test_ExternalCommandLineToolRoutineRequest_isARoutineRequestType(): - commandLineToolRequest = requests.ExternalCommandLineToolRoutineRequest( - outputDirectory="./") - assert(not commandLineToolRequest.isAnalysisRequestType()) - assert(commandLineToolRequest.isRoutineRequestType()) - - def test_ExternalCommandLineToolRoutineRequest_hasNoErrorsForValidInput(): commandLineToolRequest = requests.ExternalCommandLineToolRoutineRequest( outputDirectory="./") @@ -87,13 +74,6 @@ def test_RepositoryRoutineRequestModel_isDirectlyConstructible(): outputDirectory="./") -def test_AnalysisRequestModel_isARoutineRequestType(): - routineRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") - assert(not routineRequest.isAnalysisRequestType()) - assert(routineRequest.isRoutineRequestType()) - - def test_RepositoryRoutineRequestModel_hasNoErrorsForValidInput(): request = requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="./") From f1b1a857a6f7b0efa59a7d912a673517afb84845 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Mon, 12 Jun 2023 12:40:55 -0500 Subject: [PATCH 02/12] Deduplicate test names --- tests/test_baseRoutines.py | 4 ++-- tests/test_data.py | 2 +- tests/test_manager.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_baseRoutines.py b/tests/test_baseRoutines.py index c8b5ee7..bf70128 100644 --- a/tests/test_baseRoutines.py +++ b/tests/test_baseRoutines.py @@ -103,8 +103,8 @@ def test_ExternalCommandLineToolRoutine_canSetConfigurationParameters(mocker): assert(genericRoutine.getConfigurationParameters() == configurationParameters) -def test_OnlineRepositoryRoutine_isConstructibleWithMockImplementation(mocker): - mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) +def test_OfflineRepositoryRoutine_isConstructibleWithMockImplementation(mocker): + mocker.patch.multiple(routines.OfflineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OfflineRepositoryRoutine() diff --git a/tests/test_data.py b/tests/test_data.py index 82c7dd1..d44da23 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -159,7 +159,7 @@ def test_YAMLData_isDirectlyConstructible(): dataEntity = data.YAMLData("test.yaml") -def test_AnnotatedCSVData_isConstructibleByFactory(): +def test_AnnotatedYAMLData_isConstructibleByFactory(): factory = data.DataEntityFactory() factory.createYAMLData("test.yaml") diff --git a/tests/test_manager.py b/tests/test_manager.py index f56b1f5..e4fa9e6 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -29,7 +29,7 @@ def test_ManagerAnalysisTask_isDirectlyConstructible(): task = management.ManagerAnalysisTask(request=requests.AnalysisRequestModel()) -def test_ManagerRepositoryRoutineTask_isConstructibleByFactory(): +def test_ManagerAnalysisTask_isConstructibleByFactory(): factory = management.TaskFactory() task = factory.createManagerAnalysisTask(request=requests.AnalysisRequestModel()) From 5e0249b241179b9ad4d59422e2a5659cdb5ad8d4 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Mon, 12 Jun 2023 13:31:00 -0500 Subject: [PATCH 03/12] Fix GitHub CI --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 8473dc1..662bc31 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -30,7 +30,7 @@ jobs: python -m pip install --upgrade pip python -m pip install flake8 pytest pytest-cov python -m pip install --upgrade setuptools setuptools_scm wheel - python setup.py install + python -m pip install . #- name: Lint with flake8 # run: | # # stop the build if there are Python syntax errors or undefined names From d0cd008d8dd931f455eeb2f8d436534451df68f8 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Mon, 12 Jun 2023 13:18:58 -0500 Subject: [PATCH 04/12] Reduce code --- loggedentitytest.csv | 10 +++++ src/reposcanner/contrib.py | 79 ++++++-------------------------------- src/reposcanner/dummy.py | 19 +-------- src/reposcanner/git.py | 15 ++------ src/reposcanner/util.py | 5 +++ 5 files changed, 32 insertions(+), 96 deletions(-) create mode 100644 loggedentitytest.csv create mode 100644 src/reposcanner/util.py diff --git a/loggedentitytest.csv b/loggedentitytest.csv new file mode 100644 index 0000000..b516f0f --- /dev/null +++ b/loggedentitytest.csv @@ -0,0 +1,10 @@ +#executionid 346611c80a3511ee91b254e1adc488c5 +#creator ContributorAccountListRoutine +#datecreated 2023-06-13 +#projectid PROJID +#projectname SciKit +#url https://github.com/scikit/scikit +#names Login Name;Actual Name;Email(s) +#datatypes str;str;str +johnsmith,John Smith,jsmith@gmail.com +alicejones,Alice Jones,alice@llnl.gov diff --git a/src/reposcanner/contrib.py b/src/reposcanner/contrib.py index 5417ad1..7bc9748 100644 --- a/src/reposcanner/contrib.py +++ b/src/reposcanner/contrib.py @@ -4,6 +4,7 @@ from reposcanner.response import ResponseFactory from reposcanner.provenance import ReposcannerRunInformant from reposcanner.data import DataEntityFactory +from reposcanner.util import replaceNoneWithEmptyString as _replaceNoneWithEmptyString import pygit2 from pathlib import Path @@ -21,8 +22,7 @@ class CommitInfoMiningRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class CommitInfoMiningRoutine(OfflineRepositoryRoutine): @@ -126,16 +126,8 @@ def _getStats(commit): changes['files'] += diff.stats.files_changed return changes - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - for commit in session.walk(session.head.target, pygit2.GIT_SORT_TIME | pygit2.GIT_SORT_TOPOLOGICAL): - extractedCommitData = {} - # The person who originally made the change and when they made it, a # pygit2.Signature. author = commit.author @@ -200,21 +192,7 @@ def _replaceNoneWithEmptyString(value): class OnlineCommitAuthorshipRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class OnlineCommitAuthorshipRoutine(OnlineRepositoryRoutine): @@ -226,13 +204,6 @@ class OnlineCommitAuthorshipRoutine(OnlineRepositoryRoutine): def getRequestType(self): return OnlineCommitAuthorshipRoutineRequest - def githubImplementation(self, request, session): - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_OnlineCommitAuthorship.csv".format( @@ -270,12 +241,6 @@ def _replaceNoneWithEmptyString(value): message="OnlineCommitAuthorshipRoutine completed!", attachments=output) def gitlabImplementation(self, request, session): - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_OnlineCommitAuthorship.csv".format( @@ -479,8 +444,7 @@ def execute(self, request): class OfflineCommitCountsRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class OfflineCommitCountsRoutine(OfflineRepositoryRoutine): @@ -527,21 +491,7 @@ def offlineImplementation(self, request, session): class ContributorAccountListRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class ContributorAccountListRoutine(OnlineRepositoryRoutine): @@ -554,12 +504,6 @@ class ContributorAccountListRoutine(OnlineRepositoryRoutine): def getRequestType(self): return ContributorAccountListRoutineRequest - def _replaceNoneWithEmptyString(self, value): - if value is None: - return "" - else: - return value - def githubImplementation(self, request, session): factory = DataEntityFactory() output = factory.createAnnotatedCSVData( @@ -578,9 +522,9 @@ def githubImplementation(self, request, session): contributors = [contributor for contributor in session.get_contributors()] for contributor in contributors: output.addRecord([ - self._replaceNoneWithEmptyString(contributor.login), - self._replaceNoneWithEmptyString(contributor.name), - ';'.join([self._replaceNoneWithEmptyString(contributor.email)]) + _replaceNoneWithEmptyString(contributor.login), + _replaceNoneWithEmptyString(contributor.name), + ';'.join([_replaceNoneWithEmptyString(contributor.email)]) ]) @@ -607,8 +551,8 @@ def gitlabImplementation(self, request, session): contributors = [contributor for contributor in session.get_contributors()] for contributor in contributors: output.addRecord([ - self._replaceNoneWithEmptyString(contributor.username), - self._replaceNoneWithEmptyString(contributor.name), + _replaceNoneWithEmptyString(contributor.username), + _replaceNoneWithEmptyString(contributor.name), ';'.join(contributor.emails.list()) ]) @@ -619,8 +563,7 @@ def gitlabImplementation(self, request, session): class FileInteractionRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class FileInteractionRoutine(OfflineRepositoryRoutine): diff --git a/src/reposcanner/dummy.py b/src/reposcanner/dummy.py index 7a5b5b5..6a77272 100644 --- a/src/reposcanner/dummy.py +++ b/src/reposcanner/dummy.py @@ -11,8 +11,7 @@ class DummyOfflineRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class DummyOfflineRoutine(OfflineRepositoryRoutine): @@ -42,21 +41,7 @@ def offlineImplementation(self, request, session): class DummyOnlineRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class DummyOnlineRoutine(OnlineRepositoryRoutine): diff --git a/src/reposcanner/git.py b/src/reposcanner/git.py index 6dce062..c8301d4 100644 --- a/src/reposcanner/git.py +++ b/src/reposcanner/git.py @@ -399,19 +399,12 @@ def __init__(self, credentialsDictionary): a dictionary object, but got a {wrongType} instead!".format( wrongType=type(credentialsDictionary))) - def safeAccess(dictionary, key): - """A convenience function for error-free access to a dictionary""" - if key in dictionary: - return dictionary[key] - else: - return None - for entryName in credentialsDictionary: entry = credentialsDictionary[entryName] - url = safeAccess(entry, "url") - username = safeAccess(entry, "username") - password = safeAccess(entry, "password") - token = safeAccess(entry, "token") + url = entry.get("url", None) + username = entry.get("username", None) + password = entry.get("password", None) + token = entry.get("token", None) if url is None: print("Reposcanner: Warning, the entry {entryName} in \ the credentials file is missing a URL. Skipping.".format( diff --git a/src/reposcanner/util.py b/src/reposcanner/util.py new file mode 100644 index 0000000..b38d554 --- /dev/null +++ b/src/reposcanner/util.py @@ -0,0 +1,5 @@ +def replaceNoneWithEmptyString(value): + if value is None: + return "" + else: + return value From 00250ec2522d0a13e886b7d809467f67303bf740 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Mon, 12 Jun 2023 13:18:58 -0500 Subject: [PATCH 05/12] Reduce code --- src/reposcanner/contrib.py | 79 ++++++-------------------------------- src/reposcanner/dummy.py | 19 +-------- src/reposcanner/git.py | 15 ++------ src/util.py | 5 +++ 4 files changed, 22 insertions(+), 96 deletions(-) create mode 100644 src/util.py diff --git a/src/reposcanner/contrib.py b/src/reposcanner/contrib.py index 5417ad1..7bc9748 100644 --- a/src/reposcanner/contrib.py +++ b/src/reposcanner/contrib.py @@ -4,6 +4,7 @@ from reposcanner.response import ResponseFactory from reposcanner.provenance import ReposcannerRunInformant from reposcanner.data import DataEntityFactory +from reposcanner.util import replaceNoneWithEmptyString as _replaceNoneWithEmptyString import pygit2 from pathlib import Path @@ -21,8 +22,7 @@ class CommitInfoMiningRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class CommitInfoMiningRoutine(OfflineRepositoryRoutine): @@ -126,16 +126,8 @@ def _getStats(commit): changes['files'] += diff.stats.files_changed return changes - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - for commit in session.walk(session.head.target, pygit2.GIT_SORT_TIME | pygit2.GIT_SORT_TOPOLOGICAL): - extractedCommitData = {} - # The person who originally made the change and when they made it, a # pygit2.Signature. author = commit.author @@ -200,21 +192,7 @@ def _replaceNoneWithEmptyString(value): class OnlineCommitAuthorshipRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class OnlineCommitAuthorshipRoutine(OnlineRepositoryRoutine): @@ -226,13 +204,6 @@ class OnlineCommitAuthorshipRoutine(OnlineRepositoryRoutine): def getRequestType(self): return OnlineCommitAuthorshipRoutineRequest - def githubImplementation(self, request, session): - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_OnlineCommitAuthorship.csv".format( @@ -270,12 +241,6 @@ def _replaceNoneWithEmptyString(value): message="OnlineCommitAuthorshipRoutine completed!", attachments=output) def gitlabImplementation(self, request, session): - def _replaceNoneWithEmptyString(value): - if value is None: - return "" - else: - return value - factory = DataEntityFactory() output = factory.createAnnotatedCSVData( "{outputDirectory}/{repoName}_OnlineCommitAuthorship.csv".format( @@ -479,8 +444,7 @@ def execute(self, request): class OfflineCommitCountsRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class OfflineCommitCountsRoutine(OfflineRepositoryRoutine): @@ -527,21 +491,7 @@ def offlineImplementation(self, request, session): class ContributorAccountListRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class ContributorAccountListRoutine(OnlineRepositoryRoutine): @@ -554,12 +504,6 @@ class ContributorAccountListRoutine(OnlineRepositoryRoutine): def getRequestType(self): return ContributorAccountListRoutineRequest - def _replaceNoneWithEmptyString(self, value): - if value is None: - return "" - else: - return value - def githubImplementation(self, request, session): factory = DataEntityFactory() output = factory.createAnnotatedCSVData( @@ -578,9 +522,9 @@ def githubImplementation(self, request, session): contributors = [contributor for contributor in session.get_contributors()] for contributor in contributors: output.addRecord([ - self._replaceNoneWithEmptyString(contributor.login), - self._replaceNoneWithEmptyString(contributor.name), - ';'.join([self._replaceNoneWithEmptyString(contributor.email)]) + _replaceNoneWithEmptyString(contributor.login), + _replaceNoneWithEmptyString(contributor.name), + ';'.join([_replaceNoneWithEmptyString(contributor.email)]) ]) @@ -607,8 +551,8 @@ def gitlabImplementation(self, request, session): contributors = [contributor for contributor in session.get_contributors()] for contributor in contributors: output.addRecord([ - self._replaceNoneWithEmptyString(contributor.username), - self._replaceNoneWithEmptyString(contributor.name), + _replaceNoneWithEmptyString(contributor.username), + _replaceNoneWithEmptyString(contributor.name), ';'.join(contributor.emails.list()) ]) @@ -619,8 +563,7 @@ def gitlabImplementation(self, request, session): class FileInteractionRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class FileInteractionRoutine(OfflineRepositoryRoutine): diff --git a/src/reposcanner/dummy.py b/src/reposcanner/dummy.py index 7a5b5b5..6a77272 100644 --- a/src/reposcanner/dummy.py +++ b/src/reposcanner/dummy.py @@ -11,8 +11,7 @@ class DummyOfflineRoutineRequest(OfflineRoutineRequest): - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): - super().__init__(repositoryURL, outputDirectory, workspaceDirectory) + pass class DummyOfflineRoutine(OfflineRepositoryRoutine): @@ -42,21 +41,7 @@ def offlineImplementation(self, request, session): class DummyOnlineRoutineRequest(OnlineRoutineRequest): - def __init__( - self, - repositoryURL, - outputDirectory, - username=None, - password=None, - token=None, - keychain=None): - super().__init__( - repositoryURL, - outputDirectory, - username=username, - password=password, - token=token, - keychain=keychain) + pass class DummyOnlineRoutine(OnlineRepositoryRoutine): diff --git a/src/reposcanner/git.py b/src/reposcanner/git.py index 6dce062..c8301d4 100644 --- a/src/reposcanner/git.py +++ b/src/reposcanner/git.py @@ -399,19 +399,12 @@ def __init__(self, credentialsDictionary): a dictionary object, but got a {wrongType} instead!".format( wrongType=type(credentialsDictionary))) - def safeAccess(dictionary, key): - """A convenience function for error-free access to a dictionary""" - if key in dictionary: - return dictionary[key] - else: - return None - for entryName in credentialsDictionary: entry = credentialsDictionary[entryName] - url = safeAccess(entry, "url") - username = safeAccess(entry, "username") - password = safeAccess(entry, "password") - token = safeAccess(entry, "token") + url = entry.get("url", None) + username = entry.get("username", None) + password = entry.get("password", None) + token = entry.get("token", None) if url is None: print("Reposcanner: Warning, the entry {entryName} in \ the credentials file is missing a URL. Skipping.".format( diff --git a/src/util.py b/src/util.py new file mode 100644 index 0000000..b38d554 --- /dev/null +++ b/src/util.py @@ -0,0 +1,5 @@ +def replaceNoneWithEmptyString(value): + if value is None: + return "" + else: + return value From 1f0666a9f138882eda33395249752e16cd85371a Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Mon, 12 Jun 2023 13:31:00 -0500 Subject: [PATCH 06/12] Fix GitHub CI --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 8473dc1..662bc31 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -30,7 +30,7 @@ jobs: python -m pip install --upgrade pip python -m pip install flake8 pytest pytest-cov python -m pip install --upgrade setuptools setuptools_scm wheel - python setup.py install + python -m pip install . #- name: Lint with flake8 # run: | # # stop the build if there are Python syntax errors or undefined names From 008ad1403c52423d4de67027f5350078e23c745c Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 13 Jun 2023 16:58:19 -0500 Subject: [PATCH 07/12] Use reflection to allow new routines externally --- src/reposcanner/manager.py | 135 +++++++++++++++++++++++-------------- tests/test_manager.py | 16 ++++- 2 files changed, 97 insertions(+), 54 deletions(-) diff --git a/src/reposcanner/manager.py b/src/reposcanner/manager.py index ea108b4..c01758a 100644 --- a/src/reposcanner/manager.py +++ b/src/reposcanner/manager.py @@ -1,10 +1,10 @@ -from reposcanner.contrib import ContributorAccountListRoutine, OfflineCommitCountsRoutine, GambitCommitAuthorshipInferenceAnalysis -from reposcanner.contrib import CommitInfoMiningRoutine, OnlineCommitAuthorshipRoutine -from reposcanner.dummy import DummyOfflineRoutine, DummyOnlineRoutine, DummyAnalysis from reposcanner.git import CredentialKeychain from reposcanner.data import DataEntityStore from reposcanner.response import ResponseFactory from reposcanner.routines import RepositoryRoutine, ExternalCommandLineToolRoutine +from reposcanner.analyses import DataAnalysis +import warnings +import importlib import datetime import logging import curses @@ -209,57 +209,90 @@ def __init__( self._guiModeEnabled = gui self._store = DataEntityStore() + @staticmethod + def dynamicallyImportFrom(name): + if ":" not in name: + warnings.warn( + "Unqualified routine names ({}) are deprecated. " + "Use . or .:." + .format(name), + DeprecationWarning, + ) + import reposcanner.contrib, reposcanner.dummy + if hasattr(reposcanner.contrib, name): + return getattr(reposcanner.contrib, name) + elif hasattr(reposcanner.dummy, name): + return getattr(reposcanner.dummy, name) + elif name in globals(): + return globals()[name] + else: + raise ValueError( + "{} not found in the default search locations." + .format(name) + ) + else: + importName, _, objectName = name.partition(":") + module = importlib.import_module(importName) + return getattr(module, objectName) + + def initializeRoutinesAndAnalyses(self, configData): """Constructs RepositoryRoutine and DataAnalysis objects that belong to the manager.""" - if 'routines' in configData: - for routineEntry in configData['routines']: - if isinstance(routineEntry, dict): - # The routineEntry is a dictionary, implying it - # has parameters we need to pass to the - # constructor. Otherwise it'll just be a plain string. - routineName = list(routineEntry.keys())[0] - configParameters = routineEntry[routineName] - else: - routineName = routineEntry - configParameters = None - try: - routineClazz = getattr(sys.modules[__name__], routineName) - routineInstance = routineClazz() - routineInstance.setConfigurationParameters(configParameters) - - if isinstance(routineInstance, RepositoryRoutine): - self._repositoryRoutines.append(routineInstance) - elif isinstance(routineInstance, ExternalCommandLineToolRoutine): - self._externalCommandLineToolRoutines.append(routineInstance) - else: - raise TypeError("ReposcannerManager does not know how to \ - handle this routine type: {routineType}".format(type(routineInstance))) - except BaseException: - raise ValueError( - "Failed to instantiate routine matching name {name}".format( - name=routineName)) - - if 'analyses' in configData: - for analysisEntry in configData['analyses']: - if isinstance(routineEntry, dict): - # The analysisEntry is a dictionary, implying it - # has parameters we need to pass to the - # constructor. Otherwise it'll just be a plain string. - analysisName = list(analysisEntry.keys())[0] - configParameters = analysisEntry[analysisName] - else: - analysisName = analysisEntry - configParameters = None - try: - analysisClazz = getattr(sys.modules[__name__], analysisName) - analysisInstance = analysisClazz() - analysisInstance.setConfigurationParameters(configParameters) - self._analyses.append(analysisInstance) - except BaseException: - raise ValueError( - "Failed to instantiate analysis matching name {name}".format( - name=analysisName)) + for routineEntry in configData.get('routines', []): + if isinstance(routineEntry, dict): + # The routineEntry is a dictionary, implying it + # has parameters we need to pass to the + # constructor. Otherwise it'll just be a plain string. + routineName = list(routineEntry.keys())[0] + configParameters = routineEntry[routineName] + elif isinstance(routineEntry, str): + routineName = routineEntry + configParameters = None + else: + raise TypeError("Invalid routine: {} ({})" + .format(routineEntry, type(routineEntry))) + + routineClazz = self.dynamicallyImportFrom(routineName) + routineInstance = routineClazz() + routineInstance.setConfigurationParameters(configParameters) + + if isinstance(routineInstance, RepositoryRoutine): + self._repositoryRoutines.append(routineInstance) + elif isinstance(routineInstance, ExternalCommandLineToolRoutine): + self._externalCommandLineToolRoutines.append(routineInstance) + else: + raise TypeError( + "ReposcannerManager does not know how to handle this " + "routine type: {}" + .format(type(routineInstance)) + ) + + for analysisEntry in configData.get('analyses', []): + if isinstance(analysisEntry, dict): + # The analysisEntry is a dictionary, implying it + # has parameters we need to pass to the + # constructor. Otherwise it'll just be a plain string. + analysisName = list(analysisEntry.keys())[0] + configParameters = analysisEntry[analysisName] + elif isinstance(analysisEntry, str): + analysisName = analysisEntry + configParameters = None + else: + raise TypeError("Invalid analysis: {} ({})" + .format(analysisName, type(analysisName))) + analysisClazz = self.dynamicallyImportFrom(analysisName) + analysisInstance = analysisClazz() + analysisInstance.setConfigurationParameters(configParameters) + + if isinstance(analysisInstance, DataAnalysis): + self._analyses.append(analysisInstance) + else: + raise TypeError( + "ReposcannerManager does not know how to handle this " + "analysis type: {}" + .format(type(analysisInstance)) + ) for routine in self._repositoryRoutines: if self._notebook is not None: diff --git a/tests/test_manager.py b/tests/test_manager.py index e4fa9e6..52a7e13 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -84,6 +84,10 @@ def test_ReposcannerManager_CanParseConfigYAMLFileAndConstructRoutines(tmpdir): contents = """ routines: - ContributorAccountListRoutine + - reposcanner.contrib:ContributorAccountListRoutine + analyses: + - GambitCommitAuthorshipInferenceAnalysis + - reposcanner.contrib:GambitCommitAuthorshipInferenceAnalysis """ outfile.write(contents) @@ -91,10 +95,16 @@ def test_ReposcannerManager_CanParseConfigYAMLFileAndConstructRoutines(tmpdir): configEntity.readFromFile() configDict = configEntity.getData() - manager.initializeRoutinesAndAnalyses(configDict) + with pytest.deprecated_call(): + manager.initializeRoutinesAndAnalyses(configDict) routines = manager.getRoutines() - assert(len(routines) == 1) + assert(len(routines) == 2) assert(routines[0].__class__.__name__ == "ContributorAccountListRoutine") + assert(routines[1].__class__.__name__ == "ContributorAccountListRoutine") + analyses = manager.getAnalyses() + assert(len(analyses) == 2) + assert(analyses[0].__class__.__name__ == "GambitCommitAuthorshipInferenceAnalysis") + assert(analyses[1].__class__.__name__ == "GambitCommitAuthorshipInferenceAnalysis") def test_ReposcannerManager_missingRoutinesInConfigCausesValueError(tmpdir): @@ -119,5 +129,5 @@ def test_ReposcannerManager_missingRoutinesInConfigCausesValueError(tmpdir): configDict = configEntity.getData() # Attempting to find and initialize NonexistentRoutine will trigger a ValueError. - with pytest.raises(ValueError): + with pytest.raises(ValueError), pytest.deprecated_call(): manager.initializeRoutinesAndAnalyses(configDict) From 6730b6cf02ae0704b9d73c1031df576e25e6494e Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 13 Jun 2023 16:58:30 -0500 Subject: [PATCH 08/12] Add documentation --- README.md | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a8538fc..0e8450f 100644 --- a/README.md +++ b/README.md @@ -48,14 +48,42 @@ reposcanner --credentials tutorial/inputs/credentials.yml --config tutorial/inpu 3. examine the output files written to `tutorial/outputs` - # How to extend functionality -1. Create a new source file, `src/reposcanner/`, including a class - based on the `ContributorAccountListRoutine`. See `stars.py` as an - example of the kind of modifications required. +In the early days, the only way to extend Reposcanner was to modify its source, but now Reposcanner can be extended externally as well. We recommend the external method for future projects, so we don't create tons of forks of Reposcanner for each new analysis. + +1. Create a new source file, `my_module.py` or `my_package/my_module.py`. + +2. Import `reposcanner.requests` and one of {`reposcanner.routine` or `reposcanner.analysis`}, depending on if you want to write a routine or an analysis. + +3. Locate the most relevant subclass of `reposcanner.requests.BaseRequestModel` and one of {`reposcanner.routines.DataMiningRoutine` or `reposcanner.analyses.DataAnalysis`}. E.g., for a routine that requires GitHub API access, one would subclass `OnlineRoutineRequest` and `OnlineRepositoryRoutine`. Reference the minimal blank example in `reposcanner.dummy` or real-world examples in `reposcanner.contrib`. + +4. Write a config file that refers to your routines and analyses. See the next section on configuration files. + +5. Check that `my_module` or `my_package.my_module` is importable. E.g., `python -c 'from my_module import MyRoutineOrAnalysis'`. + - The current working directory is implicitly in the `PYTHONPATH`, so your module or package will be importable if you run Python and Reposcanner from the directory which contains your module or package + - If your module or package does not reside in the current working directory, you need to add it to your `$PYTHONPATH` for it to be importable: `export PYTHONPATH=/path/to/proj:$PYTHONPATH`. This only has to be done once for your entire shell session. Note that the `$PYTHONPATH` should have the path to the directory containing your module or package, not the path to your module or package itself. E.g., In the previous example, if you have `/path/to/proj/my_module.py` or `/path/to/proj/my_package/my_module.py`, set the `PYTHONPATH` to `/path/to/proj`. -2. Add the new class name (for example `- StarGazersRoutine`) to the end of `config.yml`. +6. Run Reposcanner. -3. Run the test scan and inspect output to ensure your scan worked as intended. +# Input files + +## config.yaml + +The config file contains a list of routines and a list of analyses. Each routine or analysis is identified as `my_module:ClassName` or `my_module.my_package:ClassName`. + +Within each routine, one can put a dictionary of keyword parameters that will get passed to that routine. + +``` +routines: + - my_module:NameOfOneRoutine + - routine: my_module:NameOfAnotherOneRoutine + arg0: "foo" + arg1: [1, 2, 3] +analysis: + - my_module:NameOfOneRoutine + - my_module:NameOfAnotherOneRoutine + arg0: "foo" + arg1: [1, 2, 3] +``` From 35a8e4120f5c4fe46b07d14928bd08af84d27502 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Thu, 15 Jun 2023 14:47:14 -0500 Subject: [PATCH 09/12] Clean up after tests (no spurious files) --- tests/test_provenance.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/test_provenance.py b/tests/test_provenance.py index 83a2358..214befe 100644 --- a/tests/test_provenance.py +++ b/tests/test_provenance.py @@ -1,3 +1,4 @@ +import pathlib import pytest import datetime import reposcanner.provenance as provenance @@ -135,10 +136,11 @@ def test_ReposcannerLabNotebook_canLogStartOfTask(): def test_ReposcannerLabNotebook_canLogCompletionOfTask(tmpdir): # Overwriting methods of ContributorAccountListRoutine to return a # predetermined response. + path = pathlib.Path("loggedentitytest.csv") def generateCSVDataFile(tmpdir): informant = provenance.ReposcannerRunInformant() - dataEntity = dataEntities.AnnotatedCSVData("loggedentitytest.csv") + dataEntity = dataEntities.AnnotatedCSVData(str(path)) timestamp = datetime.date.today() columnNames = ["Login Name", "Actual Name", "Email(s)"] @@ -163,7 +165,7 @@ def executeGeneratesResponse(self, request): factory = responses.ResponseFactory() response = factory.createSuccessResponse(attachments=[]) factory = dataEntities.DataEntityFactory() - csvDataEntity = factory.createAnnotatedCSVData(filePath="loggedentitytest.csv") + csvDataEntity = factory.createAnnotatedCSVData(filePath=str(path)) csvDataEntity.readFromFile() response.addAttachment(csvDataEntity) return response @@ -190,6 +192,9 @@ def executeGeneratesResponse(self, request): store=dataEntities.DataEntityStore(), notebook=notebook) + assert path.exists() + path.unlink() # clean up after this test + jsonDocument = notebook.getJSONRepresentation() taskID = list(jsonDocument['activity'].keys())[0] @@ -203,7 +208,7 @@ def executeGeneratesResponse(self, request): dataEntityID = None for entityID in jsonDocument['entity']: dataEntity = jsonDocument['entity'][entityID] - if dataEntity['rs:path'] == 'loggedentitytest.csv': + if dataEntity['rs:path'] == str(path): dataEntityID = entityID assert(dataEntityID is not None) @@ -265,10 +270,11 @@ def executeGeneratesResponse(self, request): def test_ReposcannerLabNotebook_canPublishResults(tmpdir): + path = pathlib.Path("loggedentitytest.csv") def generateCSVDataFile(): informant = provenance.ReposcannerRunInformant() - dataEntity = dataEntities.AnnotatedCSVData("loggedentitytest.csv") + dataEntity = dataEntities.AnnotatedCSVData(str(path)) timestamp = datetime.date.today() columnNames = ["Login Name", "Actual Name", "Email(s)"] @@ -296,7 +302,7 @@ def executeGeneratesResponse(self, request): def exportAddsAnAttachment(self, request, response): factory = dataEntities.DataEntityFactory() - csvDataEntity = factory.createAnnotatedCSVData(filePath="loggedentitytest.csv") + csvDataEntity = factory.createAnnotatedCSVData(filePath=path.name) csvDataEntity.readFromFile() response.addAttachment(csvDataEntity) contributionRoutines.ContributorAccountListRoutine.execute = executeGeneratesResponse @@ -323,4 +329,7 @@ def exportAddsAnAttachment(self, request, response): store=dataEntities.DataEntityStore(), notebook=notebook) + assert path.exists() + path.unlink() # clean up after this test + notebook.publishNotebook() From 94f96b67546f6457f4dda2ad4d4ce420762b0c51 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Tue, 13 Jun 2023 15:19:11 -0500 Subject: [PATCH 10/12] Replace boolean methods with isinstance --- src/reposcanner/contrib.py | 7 +---- src/reposcanner/manager.py | 48 ++++++++++++++++++----------------- src/reposcanner/provenance.py | 7 ++--- src/reposcanner/requests.py | 45 -------------------------------- src/reposcanner/routines.py | 9 ++++--- src/{ => reposcanner}/util.py | 0 tests/test_baseRoutines.py | 14 +++++----- tests/test_requests.py | 20 --------------- 8 files changed, 42 insertions(+), 108 deletions(-) rename src/{ => reposcanner}/util.py (100%) diff --git a/src/reposcanner/contrib.py b/src/reposcanner/contrib.py index 7bc9748..fe50cc3 100644 --- a/src/reposcanner/contrib.py +++ b/src/reposcanner/contrib.py @@ -31,9 +31,6 @@ class CommitInfoMiningRoutine(OfflineRepositoryRoutine): authorship information, the commit message, and which files were interacted with. """ - def getRequestType(self): - return CommitInfoMiningRoutineRequest - def offlineImplementation(self, request, session): factory = DataEntityFactory() @@ -335,10 +332,8 @@ def __init__(self): try: import gambit except ImportError: - self.gambitIsAvailable = False self.gambitImportRef = None else: - self.gambitIsAvailable = True self.gambitImportRef = gambit def getRequestType(self): @@ -350,7 +345,7 @@ def getRequestType(self): def execute(self, request): responseFactory = ResponseFactory() - if not self.gambitIsAvailable: + if not self.gambitImportRef is not None: return responseFactory.createFailureResponse(message="Gambit is not \ installed, halting execution.") diff --git a/src/reposcanner/manager.py b/src/reposcanner/manager.py index c01758a..d604665 100644 --- a/src/reposcanner/manager.py +++ b/src/reposcanner/manager.py @@ -1,6 +1,7 @@ from reposcanner.git import CredentialKeychain from reposcanner.data import DataEntityStore from reposcanner.response import ResponseFactory +from reposcanner.requests import AnalysisRequestModel, ExternalCommandLineToolRoutineRequest, OnlineRoutineRequest, RepositoryRoutineRequestModel from reposcanner.routines import RepositoryRoutine, ExternalCommandLineToolRoutine from reposcanner.analyses import DataAnalysis import warnings @@ -63,7 +64,7 @@ def process(self, agents, store, notebook): if selectedAgent is not None: if notebook is not None: notebook.onTaskStart(self, store, selectedAgent) - if self._request.isAnalysisRequestType(): + if isinstance(self._request, AnalysisRequestModel): self._request.fetchDataFromStore(store) self._response = selectedAgent.run(self._request) if notebook is not None: @@ -73,7 +74,7 @@ def process(self, agents, store, notebook): self._response = responseFactory.createFailureResponse( message="No routine/analysis was found that could \ execute the request ({requestType}).".format( - requestType=type(request))) + requestType=type(self._request))) @abstractmethod def getResponseDescription(self): @@ -348,27 +349,28 @@ def isGUIModeEnabled(self): def buildTask(self, projectID, projectName, url, routineOrAnalysis): """Constructs a task to hold a request/response pair.""" requestType = routineOrAnalysis.getRequestType() - if requestType.isRoutineRequestType(): - if requestType.isExternalCommandLineToolRequestType(): - request = requestType(outputDirectory=self._outputDirectory) - task = ManagerExternalCommandLineToolTask(request) - return task - else: - if requestType.requiresOnlineAPIAccess(): - request = requestType(repositoryURL=url, - outputDirectory=self._outputDirectory, - keychain=self._keychain) - else: - request = requestType(repositoryURL=url, - outputDirectory=self._outputDirectory, - workspaceDirectory=self._workspaceDirectory) - task = ManagerRepositoryRoutineTask( - projectID=projectID, projectName=projectName, url=url, request=request) - return task - elif requestType.isAnalysisRequestType(): - request = requestType() - task = ManagerAnalysisTask(request) - return task + if issubclass(requestType, ExternalCommandLineToolRoutineRequest): + cmd_request = requestType(outputDirectory=self._outputDirectory) + cmd_task = ManagerExternalCommandLineToolTask(cmd_request) + return cmd_task + elif issubclass(requestType, OnlineRoutineRequest): + online_request = requestType(repositoryURL=url, + outputDirectory=self._outputDirectory, + keychain=self._keychain) + online_task = ManagerRepositoryRoutineTask( + projectID=projectID, projectName=projectName, url=url, request=online_request) + return online_task + elif issubclass(requestType, RepositoryRoutineRequestModel): + repo_request = requestType(repositoryURL=url, + outputDirectory=self._outputDirectory, + workspaceDirectory=self._workspaceDirectory) + repo_task = ManagerRepositoryRoutineTask( + projectID=projectID, projectName=projectName, url=url, request=repo_request) + return repo_task + elif issubclass(requestType, AnalysisRequestModel): + analysis_request = requestType() + analysis_task = ManagerAnalysisTask(analysis_request) + return analysis_task else: raise TypeError( "Encountered unrecognized request type when building task: {requestType}.".format( diff --git a/src/reposcanner/provenance.py b/src/reposcanner/provenance.py index 2a22c43..7d0547c 100644 --- a/src/reposcanner/provenance.py +++ b/src/reposcanner/provenance.py @@ -30,6 +30,8 @@ import subprocess import os import reposcanner.data as dataEntities +from reposcanner.manager import ManagerRepositoryRoutineTask +from reposcanner.requests import AnalysisRequestModel """ trungdong/prov, a W3C-compliant provenance Data Model @@ -265,8 +267,7 @@ def onTaskCreation(self, task): task: The ManagerTask object. """ - request = task.getRequest() - if request.isRoutineRequestType(): + if isinstance(task, ManagerRepositoryRoutineTask): task = self._document.activity("rs:task{taskid}".format(taskid=id(task)), other_attributes=( ('rs:requestType', task.getRequestClassName()), ('rs:projectID', task.getProjectID()), @@ -300,7 +301,7 @@ def onTaskStart(self, task, store, agent): # If the request is an analysis request, we can probe the request to see which # files it intends to grab from the data store. request = task.getRequest() - if request.isAnalysisRequestType(): + if isinstance(request, AnalysisRequestModel): filesToBeUsedInAnalysis = store.getByCriteria(request.getDataCriteria()) for entity in filesToBeUsedInAnalysis: entityID = None diff --git a/src/reposcanner/requests.py b/src/reposcanner/requests.py index 59503bc..18cea45 100644 --- a/src/reposcanner/requests.py +++ b/src/reposcanner/requests.py @@ -23,21 +23,8 @@ def hasErrors(self): def getErrors(self): return self._errors - @classmethod - def isRoutineRequestType(cls): - return False - - @classmethod - def isAnalysisRequestType(cls): - return False - class AnalysisRequestModel(BaseRequestModel): - - @classmethod - def isAnalysisRequestType(cls): - return True - def __init__(self, outputDirectory="./"): super().__init__() self._data = [] @@ -145,14 +132,6 @@ def __init__(self, outputDirectory): def getOutputDirectory(self): return self._outputDirectory - @classmethod - def isRoutineRequestType(cls): - return True - - @classmethod - def isExternalCommandLineToolRequestType(cls): - return True - class RepositoryRoutineRequestModel(BaseRequestModel): """ @@ -213,14 +192,6 @@ def getRepositoryLocation(self): def getOutputDirectory(self): return self._outputDirectory - @classmethod - def isRoutineRequestType(cls): - return True - - @classmethod - def isExternalCommandLineToolRequestType(cls): - return False - class OnlineRoutineRequest(RepositoryRoutineRequestModel): """ @@ -228,14 +199,6 @@ class OnlineRoutineRequest(RepositoryRoutineRequestModel): Request classes for OnlineRepositoryRoutine should inherit from this class. """ - @classmethod - def requiresOnlineAPIAccess(cls): - """ - Tells the caller whether this request requires access to an online - version control API. - """ - return True - def __init__( self, repositoryURL, @@ -286,14 +249,6 @@ class OfflineRoutineRequest(RepositoryRoutineRequestModel): Request classes for OfflineRepositoryRoutine should inherit from this class. """ - @classmethod - def requiresOnlineAPIAccess(cls): - """ - Tells the caller whether this request requires access to an online - version control API. - """ - return False - def __init__(self, repositoryURL, outputDirectory, workspaceDirectory): """ Additional Parameters: diff --git a/src/reposcanner/routines.py b/src/reposcanner/routines.py index 5c7f4f8..07c9263 100644 --- a/src/reposcanner/routines.py +++ b/src/reposcanner/routines.py @@ -4,6 +4,7 @@ import pygit2 from reposcanner.git import GitEntityFactory, RepositoryLocation from reposcanner.response import ResponseFactory +from reposcanner.requests import ExternalCommandLineToolRoutineRequest, OfflineRoutineRequest, OnlineRoutineRequest class DataMiningRoutine(ABC): @@ -112,14 +113,14 @@ def commandLineToolImplementation(self, request): def execute(self, request): responseFactory = ResponseFactory() - if not self.canHandleRequest(request): + if not self.canHandleRequest(request) or not isinstance(request, ExternalCommandLineToolRoutineRequest): return responseFactory.createFailureResponse( message="The routine was passed a request of the wrong type.") elif request.hasErrors(): return responseFactory.createFailureResponse( message="The request had errors in it and cannot be processed.", attachments=request.getErrors()) - elif not self.isExternalToolAvailable(): + elif not isinstance(request, ExternalCommandLineToolRoutineRequest): return responseFactory.createFailureResponse( message="The command-line tool required by this routine is not available or\ is otherwise unable to be called.") @@ -144,7 +145,7 @@ def execute(self, request): overriding that methods. """ responseFactory = ResponseFactory() - if not self.canHandleRequest(request): + if not self.canHandleRequest(request) or not isinstance(request, OfflineRoutineRequest): return responseFactory.createFailureResponse( message="The routine was passed a request of the wrong type.") elif request.hasErrors(): @@ -214,7 +215,7 @@ def execute(self, request): overriding those methods. """ responseFactory = ResponseFactory() - if not self.canHandleRequest(request): + if not self.canHandleRequest(request) or not isinstance(request, OnlineRoutineRequest): return responseFactory.createFailureResponse( message="The routine was passed a request of the wrong type.") elif request.hasErrors(): diff --git a/src/util.py b/src/reposcanner/util.py similarity index 100% rename from src/util.py rename to src/reposcanner/util.py diff --git a/tests/test_baseRoutines.py b/tests/test_baseRoutines.py index bf70128..9dd091f 100644 --- a/tests/test_baseRoutines.py +++ b/tests/test_baseRoutines.py @@ -136,8 +136,8 @@ def canAlwaysHandleRequest(self, request): routines.OfflineRepositoryRoutine.canHandleRequest = canAlwaysHandleRequest genericRoutine = routines.OfflineRepositoryRoutine() - genericRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") + genericRequest = requests.OfflineRoutineRequest( + repositoryURL="https://github.com/owner/repo", outputDirectory="./out", workspaceDirectory="./workspace") genericRequest.addError(message="Something has gone horribly wrong.") response = genericRoutine.run(genericRequest) assert(not response.wasSuccessful()) @@ -161,8 +161,8 @@ def canNeverHandleRequest(self, request): routines.OnlineRepositoryRoutine.canHandleRequest = canNeverHandleRequest genericRoutine = routines.OnlineRepositoryRoutine() - genericRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") + genericRequest = requests.OnlineRoutineRequest( + repositoryURL="https://github.com/owner/repo", outputDirectory="./out") response = genericRoutine.run(genericRequest) assert(not response.wasSuccessful()) assert(response.hasMessage()) @@ -179,7 +179,7 @@ def canAlwaysHandleRequest(self, request): routines.OnlineRepositoryRoutine.canHandleRequest = canAlwaysHandleRequest genericRoutine = routines.OnlineRepositoryRoutine() - genericRequest = requests.RepositoryRoutineRequestModel( + genericRequest = requests.OnlineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./") genericRequest.addError(message="Something has gone horribly wrong.") response = genericRoutine.run(genericRequest) @@ -203,8 +203,8 @@ def canAlwaysHandleRequest(self, request): emptyAPICreator = gitEntityFactory.createVCSAPISessionCompositeCreator() genericRoutine.sessionCreator = emptyAPICreator - genericRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") + genericRequest = requests.OnlineRoutineRequest( + repositoryURL="https://github.com/owner/repo", outputDirectory="./", username="foo", password="bar") response = genericRoutine.run(genericRequest) assert(not response.wasSuccessful()) assert(response.hasMessage()) diff --git a/tests/test_requests.py b/tests/test_requests.py index 0ff1df9..50a7293 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -9,12 +9,6 @@ def test_AnalysisRequestModel_isDirectlyConstructible(): analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") -def test_AnalysisRequestModel_isAnAnalysisRequestType(): - analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") - assert(analysisRequest.isAnalysisRequestType()) - assert(not analysisRequest.isRoutineRequestType()) - - def test_AnalysisRequestModel_hasNoErrorsForValidInput(): analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") assert(not analysisRequest.hasErrors()) @@ -52,13 +46,6 @@ def test_ExternalCommandLineToolRoutineRequest_isDirectlyConstructible(): requests.ExternalCommandLineToolRoutineRequest(outputDirectory="./") -def test_ExternalCommandLineToolRoutineRequest_isARoutineRequestType(): - commandLineToolRequest = requests.ExternalCommandLineToolRoutineRequest( - outputDirectory="./") - assert(not commandLineToolRequest.isAnalysisRequestType()) - assert(commandLineToolRequest.isRoutineRequestType()) - - def test_ExternalCommandLineToolRoutineRequest_hasNoErrorsForValidInput(): commandLineToolRequest = requests.ExternalCommandLineToolRoutineRequest( outputDirectory="./") @@ -87,13 +74,6 @@ def test_RepositoryRoutineRequestModel_isDirectlyConstructible(): outputDirectory="./") -def test_AnalysisRequestModel_isARoutineRequestType(): - routineRequest = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory="./") - assert(not routineRequest.isAnalysisRequestType()) - assert(routineRequest.isRoutineRequestType()) - - def test_RepositoryRoutineRequestModel_hasNoErrorsForValidInput(): request = requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="./") From 6afd4e47ad8ad44a7bdbc4ced63d6be6d2ed7b34 Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Thu, 15 Jun 2023 16:20:40 -0500 Subject: [PATCH 11/12] Update type annotations for --strict --- .github/workflows/python-package.yml | 2 +- loggedentitytest.csv | 10 --- src/reposcanner/contrib.py | 11 ++- src/reposcanner/data.py | 18 ++--- src/reposcanner/manager.py | 4 +- src/reposcanner/provenance.py | 6 +- src/reposcanner/routines.py | 5 +- src/reposcanner/util.py | 12 ++- tests/test_baseAnalyses.py | 22 ++++-- tests/test_baseRoutines.py | 111 ++++++++++++++++----------- tests/test_data.py | 68 ++++++++-------- tests/test_dummyWorkflow.py | 34 ++++---- tests/test_gitEntities.py | 105 ++++++++++++------------- tests/test_implementedAnalyses.py | 30 ++++---- tests/test_implementedRoutines.py | 10 +-- tests/test_manager.py | 38 ++++----- tests/test_provenance.py | 67 ++++++++-------- tests/test_requests.py | 80 ++++++++++--------- tests/test_response.py | 20 ++--- tests/test_thirdPartyLibraries.py | 2 +- 20 files changed, 349 insertions(+), 306 deletions(-) delete mode 100644 loggedentitytest.csv diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 21a0de4..ffdfb30 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -39,7 +39,7 @@ jobs: # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Check with Mypy run: | - MYPYPATH=src python -m mypy src tests + MYPYPATH=src python -m mypy --strict src tests - name: Test with pytest env: DUMMY_GITHUBAPI_TOKEN: ${{ secrets.DUMMY_WORKFLOW_GITHUB_TOKEN }} diff --git a/loggedentitytest.csv b/loggedentitytest.csv deleted file mode 100644 index b516f0f..0000000 --- a/loggedentitytest.csv +++ /dev/null @@ -1,10 +0,0 @@ -#executionid 346611c80a3511ee91b254e1adc488c5 -#creator ContributorAccountListRoutine -#datecreated 2023-06-13 -#projectid PROJID -#projectname SciKit -#url https://github.com/scikit/scikit -#names Login Name;Actual Name;Email(s) -#datatypes str;str;str -johnsmith,John Smith,jsmith@gmail.com -alicejones,Alice Jones,alice@llnl.gov diff --git a/src/reposcanner/contrib.py b/src/reposcanner/contrib.py index 8dc103d..5d384f0 100644 --- a/src/reposcanner/contrib.py +++ b/src/reposcanner/contrib.py @@ -8,7 +8,7 @@ from reposcanner.data import DataEntityFactory, ReposcannerDataEntity, AnnotatedCSVData from reposcanner.util import replaceNoneWithEmptyString as _replaceNoneWithEmptyString from reposcanner.git import Session -from typing import Dict, Type, List +from typing import Dict, Type, List, Any import pygit2 # type: ignore from pathlib import Path @@ -25,6 +25,9 @@ ######################################## +Commit = Any # type placeholder + + class CommitInfoMiningRoutineRequest(OfflineRoutineRequest): pass @@ -81,7 +84,7 @@ def offlineImplementation(self, request: BaseRequestModel, session: Session) -> ["list"] + ["str"]) - def _getFilesTouched(commit) -> List[str]: + def _getFilesTouched(commit: Commit) -> List[str]: # TODO: Go back and check this method. Are we correctly interpreting the semantics of # the deltas we receive from pygit2? changes = [] @@ -110,7 +113,7 @@ def _cleanCommitMessage(s: str) -> str: # Also get rid of commas, as commas are our default delimiter. return re.sub('\\s+', ' ', s).replace(',', ' ') - def _getStats(commit) -> Dict[str, int]: + def _getStats(commit: Commit) -> Dict[str, int]: changes = {'ins': 0, 'del': 0, 'files': 0} if len(commit.parents) == 0: diff = commit.tree.diff_to_tree() @@ -202,7 +205,7 @@ class OnlineCommitAuthorshipRoutine(OnlineRepositoryRoutine): with GitHub/Gitlab/Bitbucket account information. """ - def getRequestType(self): + def getRequestType(self) -> Type[BaseRequestModel]: return OnlineCommitAuthorshipRoutineRequest def githubImplementation(self, request: BaseRequestModel, session: Session) -> ResponseModel: diff --git a/src/reposcanner/data.py b/src/reposcanner/data.py index f3bf665..92d3d60 100644 --- a/src/reposcanner/data.py +++ b/src/reposcanner/data.py @@ -10,7 +10,7 @@ import pandas as pd from pathlib import Path import datetime -from typing import List, Iterable, Dict, Callable, Optional, Any, Tuple, Union +from typing import List, Iterable, Dict, Callable, Optional, Any, Tuple, Union, cast class DataEntityStore: @@ -131,7 +131,7 @@ def setReposcannerExecutionID(self, executionid: Optional[str]) -> None: self.setMetadataAttribute("executionid", executionid) def getReposcannerExecutionID(self) -> Optional[str]: - return self.getMetadataAttribute("executionid") + return cast(Optional[str], self.getMetadataAttribute("executionid")) def setDateCreated(self, dt: Optional[datetime.date]) -> None: """ @@ -140,7 +140,7 @@ def setDateCreated(self, dt: Optional[datetime.date]) -> None: self.setMetadataAttribute("datecreated", dt) def getDateCreated(self) -> Optional[datetime.date]: - return self.getMetadataAttribute("datecreated") + return cast(Optional[datetime.date], self.getMetadataAttribute("datecreated")) def setCreator(self, creator: Optional[str]) -> None: """ @@ -150,7 +150,7 @@ def setCreator(self, creator: Optional[str]) -> None: self.setMetadataAttribute("creator", creator) def getCreator(self) -> Optional[str]: - return self.getMetadataAttribute("creator") + return cast(Optional[str], self.getMetadataAttribute("creator")) def fileExists(self) -> bool: return os.path.exists(self._filePath) @@ -265,7 +265,7 @@ def setProjectID(self, projectid: Optional[str]) -> None: self.setMetadataAttribute("projectid", projectid) def getProjectID(self) -> Optional[str]: - return self.getMetadataAttribute("projectid") + return cast(Optional[str], self.getMetadataAttribute("projectid")) def setProjectName(self, projectname: Optional[str]) -> None: """ @@ -275,7 +275,7 @@ def setProjectName(self, projectname: Optional[str]) -> None: self.setMetadataAttribute("projectname", projectname) def getProjectName(self) -> Optional[str]: - return self.getMetadataAttribute("projectname") + return cast(Optional[str], self.getMetadataAttribute("projectname")) def setURL(self, url: Optional[str]) -> None: """ @@ -285,10 +285,10 @@ def setURL(self, url: Optional[str]) -> None: self.setMetadataAttribute("url", url) def getURL(self) -> Optional[str]: - return self.getMetadataAttribute("url") + return cast(Optional[str], self.getMetadataAttribute("url")) def getColumnNames(self) -> List[str]: - return self.getMetadataAttribute("names") + return cast(List[str], self.getMetadataAttribute("names")) def setColumnNames(self, names: List[str]) -> None: """ @@ -298,7 +298,7 @@ def setColumnNames(self, names: List[str]) -> None: self.setMetadataAttribute("names", names) def getColumnDatatypes(self) -> List[str]: - return self.getMetadataAttribute("datatypes") + return cast(List[str], self.getMetadataAttribute("datatypes")) def setColumnDatatypes(self, datatypes: List[str]) -> None: """ diff --git a/src/reposcanner/manager.py b/src/reposcanner/manager.py index 63c6bcb..e757228 100644 --- a/src/reposcanner/manager.py +++ b/src/reposcanner/manager.py @@ -7,6 +7,8 @@ from reposcanner.requests import BaseRequestModel, AnalysisRequestModel, ExternalCommandLineToolRoutineRequest, RepositoryRoutineRequestModel, OnlineRoutineRequest from reposcanner.analyses import DataAnalysis import logging +import warnings +import importlib import curses import sys from typing import Sequence, Union, Optional, List, Iterable, Dict, Any, Tuple, cast, TYPE_CHECKING @@ -259,7 +261,7 @@ def dynamicallyImportFrom(name: str) -> Any: return getattr(module, objectName) - def initializeRoutinesAndAnalyses(self, configData): + def initializeRoutinesAndAnalyses(self, configData: Dict[str, Any]) -> None: """Constructs RepositoryRoutine and DataAnalysis objects that belong to the manager.""" for routineEntry in configData.get('routines', []): diff --git a/src/reposcanner/provenance.py b/src/reposcanner/provenance.py index 00d0195..d094768 100644 --- a/src/reposcanner/provenance.py +++ b/src/reposcanner/provenance.py @@ -26,6 +26,7 @@ import argparse import datetime import json +import pathlib import uuid import sys import subprocess @@ -36,6 +37,7 @@ from reposcanner.routines import DataMiningRoutine, RepositoryRoutine from reposcanner.analyses import DataAnalysis from reposcanner.manager import ManagerRepositoryRoutineTask, ManagerTask +from reposcanner.util import expect_type """ trungdong/prov, a W3C-compliant provenance Data Model @@ -176,7 +178,7 @@ class ReposcannerLabNotebook(AbstractLabNotebook): class. """ - def __init__(self, notebookOutputDirectory: str) -> None: + def __init__(self, notebookOutputDirectory: Union[str, pathlib.Path]) -> None: """ notebookOutputDirectory: The directory where provenance files should be stored when calling publishNotebook(). @@ -209,7 +211,7 @@ def getProvnRepresentation(self) -> str: """ Returns the underlying Prov document in PROV-N form for testing purposes. """ - return self._document.get_provn() + return expect_type(str, self._document.get_provn()) def onStartup(self, args: argparse.Namespace) -> None: """ diff --git a/src/reposcanner/routines.py b/src/reposcanner/routines.py index 2336bd6..baec2b0 100644 --- a/src/reposcanner/routines.py +++ b/src/reposcanner/routines.py @@ -126,6 +126,9 @@ def execute(self, request: BaseRequestModel) -> ResponseModel: etype=type(e)), attachments=[e]) +Remote = Any # placeholder type alias + + class OfflineRepositoryRoutine(RepositoryRoutine): """ Class that encapsulates the stages of a PyGit2-based analysis procedure operating on a clone of a repository. @@ -148,7 +151,7 @@ def execute(self, request: BaseRequestModel) -> ResponseModel: else: try: if not os.path.exists(request.getCloneDirectory()): - def init_remote(repo, name: str, url: str): + def init_remote(repo: Session, name: str, url: str) -> Remote: # Create the remote with a mirroring url remote = repo.remotes.create( name, url, "+refs/heads/*:refs/heads/*") diff --git a/src/reposcanner/util.py b/src/reposcanner/util.py index 1bc9c87..24ab2b1 100644 --- a/src/reposcanner/util.py +++ b/src/reposcanner/util.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, TypeVar, Type, cast def replaceNoneWithEmptyString(value: Optional[str]) -> str: @@ -6,3 +6,13 @@ def replaceNoneWithEmptyString(value: Optional[str]) -> str: return "" else: return value + + +T = TypeVar("T") + + +def expect_type(typ: Type[T], obj: object) -> T: + if isinstance(obj, typ): + return cast(T, typ) + else: + raise TypeError("{} is not an instance of {}".format(obj, typ)) diff --git a/tests/test_baseAnalyses.py b/tests/test_baseAnalyses.py index d516a8d..ed18caf 100644 --- a/tests/test_baseAnalyses.py +++ b/tests/test_baseAnalyses.py @@ -3,17 +3,24 @@ import reposcanner.analyses as analyses import reposcanner.requests as requests import reposcanner.response as responses +from typing import Any -def test_DataAnalysis_isConstructibleWithMockImplementation(mocker): +# mypy: disable-error-code="abstract,method-assign" + + +def test_DataAnalysis_isConstructibleWithMockImplementation(mocker: Any) -> None: mocker.patch.multiple(analyses.DataAnalysis, __abstractmethods__=set()) genericAnalysis = analyses.DataAnalysis() -def test_DataAnalysis_runCanReturnResponse(mocker): +def test_DataAnalysis_runCanReturnResponse(mocker: Any) -> None: mocker.patch.multiple(analyses.DataAnalysis, __abstractmethods__=set()) - def executeGeneratesResponse(self, request): + def executeGeneratesResponse( + self: analyses.DataAnalysis, + request: requests.BaseRequestModel, + ) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse() return response @@ -24,10 +31,13 @@ def executeGeneratesResponse(self, request): assert(response.wasSuccessful()) -def test_DataAnalysis_exportCanAddAttachments(mocker): +def test_DataAnalysis_exportCanAddAttachments(mocker: Any) -> None: mocker.patch.multiple(analyses.DataAnalysis, __abstractmethods__=set()) - def executeGeneratesResponse(self, request): + def executeGeneratesResponse( + self: analyses.DataAnalysis, + request: requests.BaseRequestModel, + ) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse(attachments=[]) response.addAttachment("data") @@ -42,7 +52,7 @@ def executeGeneratesResponse(self, request): assert(len(response.getAttachments()) == 1) -def test_DataAnalysis_canSetConfigurationParameters(mocker): +def test_DataAnalysis_canSetConfigurationParameters(mocker: Any) -> None: mocker.patch.multiple(analyses.DataAnalysis, __abstractmethods__=set()) genericAnalysis = analyses.DataAnalysis() configurationParameters = {"verbose": True, "debug": False} diff --git a/tests/test_baseRoutines.py b/tests/test_baseRoutines.py index 946fa58..e45e52a 100644 --- a/tests/test_baseRoutines.py +++ b/tests/test_baseRoutines.py @@ -3,6 +3,7 @@ import reposcanner.routines as routines import reposcanner.requests as requests import reposcanner.response as responses +from typing import Any, Type # TODO: These tests use monkeypatching in a way that can affect other tests. @@ -26,31 +27,39 @@ # # Clazz remains unaffected. +# mypy: disable-error-code="abstract,method-assign" -def test_RepositoryRoutine_isConstructibleWithMockImplementation(mocker): + +def test_RepositoryRoutine_isConstructibleWithMockImplementation(mocker: Any) -> None: mocker.patch.multiple(routines.RepositoryRoutine, __abstractmethods__=set()) - genericRoutine = routines.RepositoryRoutine() + genericRoutine = routines.RepositoryRoutine() # type: ignore -def test_RepositoryRoutine_runCanReturnResponse(mocker): +def test_RepositoryRoutine_runCanReturnResponse(mocker: Any) -> None: mocker.patch.multiple(routines.RepositoryRoutine, __abstractmethods__=set()) - def executeGeneratesResponse(self, request): + def executeGeneratesResponse( + self: routines.RepositoryRoutine, + request: requests.RepositoryRoutineRequestModel, + ) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse() return response - routines.RepositoryRoutine.execute = executeGeneratesResponse - genericRoutine = routines.RepositoryRoutine() + routines.RepositoryRoutine.execute = executeGeneratesResponse # type: ignore + genericRoutine = routines.RepositoryRoutine() # type: ignore genericRequest = requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="./") response = genericRoutine.run(genericRequest) assert(response.wasSuccessful()) -def test_RepositoryRoutine_exportCanAddAttachments(mocker): +def test_RepositoryRoutine_exportCanAddAttachments(mocker: Any) -> None: mocker.patch.multiple(routines.RepositoryRoutine, __abstractmethods__=set()) - def executeGeneratesResponse(self, request): + def executeGeneratesResponse( + self: routines.DataMiningRoutine, + request: requests.BaseRequestModel, + ) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse(attachments=[]) response.addAttachment("data") @@ -66,7 +75,7 @@ def executeGeneratesResponse(self, request): assert(len(response.getAttachments()) == 1) -def test_RepositoryRoutine_canSetConfigurationParameters(mocker): +def test_RepositoryRoutine_canSetConfigurationParameters(mocker: Any) -> None: mocker.patch.multiple(routines.RepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.RepositoryRoutine() configurationParameters = {"verbose": True, "debug": False} @@ -77,30 +86,30 @@ def test_RepositoryRoutine_canSetConfigurationParameters(mocker): assert(genericRoutine.getConfigurationParameters() == configurationParameters) -def test_ExternalCommandLineToolRoutine_isConstructibleWithMockImplementation(mocker): +def test_ExternalCommandLineToolRoutine_isConstructibleWithMockImplementation(mocker: Any) -> None: mocker.patch.multiple( routines.ExternalCommandLineToolRoutine, __abstractmethods__=set()) - genericExternalCommandLineToolRoutine = routines.ExternalCommandLineToolRoutine() + genericExternalCommandLineToolRoutine = routines.ExternalCommandLineToolRoutine() # type: ignore -def test_ExternalCommandLineToolRoutine_runCanReturnResponses(mocker): +def test_ExternalCommandLineToolRoutine_runCanReturnResponses(mocker: Any) -> None: mocker.patch.multiple( routines.ExternalCommandLineToolRoutine, __abstractmethods__=set()) - def externalToolIsAvailable(self): - return True - - def supportsGenericRequestType(self): + def supportsGenericRequestType(self: routines.DataMiningRoutine) -> Type[requests.BaseRequestModel]: return requests.ExternalCommandLineToolRoutineRequest - def implementationGeneratesResponse(self, request): + def implementationGeneratesResponse( + self: routines.ExternalCommandLineToolRoutine, + request: requests.BaseRequestModel + ) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse(attachments=[]) response.addAttachment("data") return response - routines.ExternalCommandLineToolRoutine.isExternalToolAvailable = externalToolIsAvailable + routines.ExternalCommandLineToolRoutine.getRequestType = supportsGenericRequestType routines.ExternalCommandLineToolRoutine.commandLineToolImplementation = implementationGeneratesResponse genericExternalCommandLineToolRoutine = routines.ExternalCommandLineToolRoutine() @@ -112,7 +121,7 @@ def implementationGeneratesResponse(self, request): assert(len(response.getAttachments()) == 1) -def test_ExternalCommandLineToolRoutine_canSetConfigurationParameters(mocker): +def test_ExternalCommandLineToolRoutine_canSetConfigurationParameters(mocker: Any) -> None: mocker.patch.multiple( routines.ExternalCommandLineToolRoutine, __abstractmethods__=set()) @@ -125,16 +134,16 @@ def test_ExternalCommandLineToolRoutine_canSetConfigurationParameters(mocker): assert(genericRoutine.getConfigurationParameters() == configurationParameters) -def test_OfflineRepositoryRoutine_isConstructibleWithMockImplementation(mocker): +def test_OfflineRepositoryRoutine_isConstructibleWithMockImplementation(mocker: Any) -> None: mocker.patch.multiple(routines.OfflineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OfflineRepositoryRoutine() def test_OfflineRepositoryRoutine_inabilityToHandleRequestResultsInFailureResponse( - mocker): + mocker: Any) -> None: mocker.patch.multiple(routines.OfflineRepositoryRoutine, __abstractmethods__=set()) - def canNeverHandleRequest(self, request): + def canNeverHandleRequest(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> bool: return False originalCanHandleRequest = routines.OfflineRepositoryRoutine.canHandleRequest routines.OfflineRepositoryRoutine.canHandleRequest = canNeverHandleRequest @@ -149,10 +158,10 @@ def canNeverHandleRequest(self, request): routines.OfflineRepositoryRoutine.canHandleRequest = originalCanHandleRequest -def test_OfflineRepositoryRoutine_errorsInRequestResultsInFailureResponse(mocker): +def test_OfflineRepositoryRoutine_errorsInRequestResultsInFailureResponse(mocker: Any) -> None: mocker.patch.multiple(routines.OfflineRepositoryRoutine, __abstractmethods__=set()) - def canAlwaysHandleRequest(self, request): + def canAlwaysHandleRequest(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> bool: return True originalCanHandleRequest = routines.OfflineRepositoryRoutine.canHandleRequest routines.OfflineRepositoryRoutine.canHandleRequest = canAlwaysHandleRequest @@ -168,16 +177,16 @@ def canAlwaysHandleRequest(self, request): routines.OfflineRepositoryRoutine.canHandleRequest = originalCanHandleRequest -def test_OnlineRepositoryRoutine_isConstructibleWithMockImplementation(mocker): +def test_OnlineRepositoryRoutine_isConstructibleWithMockImplementation(mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OnlineRepositoryRoutine() def test_OnlineRepositoryRoutine_inabilityToHandleRequestResultsInFailureResponse( - mocker): + mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) - def canNeverHandleRequest(self, request): + def canNeverHandleRequest(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> bool: return False originalCanHandleRequest = routines.OnlineRepositoryRoutine.canHandleRequest routines.OnlineRepositoryRoutine.canHandleRequest = canNeverHandleRequest @@ -192,10 +201,10 @@ def canNeverHandleRequest(self, request): routines.OnlineRepositoryRoutine.canHandleRequest = originalCanHandleRequest -def test_OnlineRepositoryRoutine_errorsInRequestResultsInFailureResponse(mocker): +def test_OnlineRepositoryRoutine_errorsInRequestResultsInFailureResponse(mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) - def canAlwaysHandleRequest(self, request): + def canAlwaysHandleRequest(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> bool: return True originalCanHandleRequest = routines.OnlineRepositoryRoutine.canHandleRequest routines.OnlineRepositoryRoutine.canHandleRequest = canAlwaysHandleRequest @@ -212,10 +221,10 @@ def canAlwaysHandleRequest(self, request): def test_OnlineRepositoryRoutine_inabilityOfSessionCreatorToHandleRepositoryResultsInFailureResponse( - mocker): + mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) - def canAlwaysHandleRequest(self, request): + def canAlwaysHandleRequest(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> bool: return True originalCanHandleRequest = routines.OnlineRepositoryRoutine.canHandleRequest routines.OnlineRepositoryRoutine.canHandleRequest = canAlwaysHandleRequest @@ -230,12 +239,12 @@ def canAlwaysHandleRequest(self, request): response = genericRoutine.run(genericRequest) assert(not response.wasSuccessful()) assert(response.hasMessage()) - print(response.getMessage(), response.getAttachments()) - assert("to handle the platform of the repository" in response.getMessage()) + message = response.getMessage() + assert(message is not None and "to handle the platform of the repository" in message) routines.OnlineRepositoryRoutine.canHandleRequest = originalCanHandleRequest -def test_OnlineRepositoryRoutine_sessionCreatorSupportsGitHub(mocker): +def test_OnlineRepositoryRoutine_sessionCreatorSupportsGitHub(mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OnlineRepositoryRoutine() sessionCreator = genericRoutine.sessionCreator @@ -245,7 +254,7 @@ def test_OnlineRepositoryRoutine_sessionCreatorSupportsGitHub(mocker): genericGitHubRequest.getRepositoryLocation())) -def test_OnlineRepositoryRoutine_sessionCreatorSupportsGitlab(mocker): +def test_OnlineRepositoryRoutine_sessionCreatorSupportsGitlab(mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OnlineRepositoryRoutine() sessionCreator = genericRoutine.sessionCreator @@ -256,33 +265,43 @@ def test_OnlineRepositoryRoutine_sessionCreatorSupportsGitlab(mocker): def test_OnlineRepositoryRoutine_defaultGitHubImplementationReturnsFailedResponse( - mocker): + mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OnlineRepositoryRoutine() - response = genericRoutine.githubImplementation(request=None, session=None) + genericGitHubRequest = requests.RepositoryRoutineRequestModel( + repositoryURL="https://github.com/owner/repo", outputDirectory="./") + response = genericRoutine.githubImplementation(request=genericGitHubRequest, session=None) assert(not response.wasSuccessful()) - assert(response.hasMessage()) - assert(response.getMessage() == "This routine has no implementation available \ + message = response.getMessage() + assert(message is not None) + assert(message == "This routine has no implementation available \ to handle a GitHub repository.") def test_OnlineRepositoryRoutine_defaultGitlabImplementationReturnsFailedResponse( - mocker): + mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) genericRoutine = routines.OnlineRepositoryRoutine() - response = genericRoutine.gitlabImplementation(request=None, session=None) + genericGitlabRequest = requests.RepositoryRoutineRequestModel( + repositoryURL="https://gitlab.com/owner/repo", outputDirectory="./") + response = genericRoutine.gitlabImplementation(request=genericGitlabRequest, session=None) assert(not response.wasSuccessful()) - assert(response.hasMessage()) - assert(response.getMessage() == "This routine has no implementation available \ + message = response.getMessage() + assert(message is not None) + assert(message == "This routine has no implementation available \ to handle a Gitlab repository.") def test_OnlineRepositoryRoutine_defaultBitbucketImplementationReturnsFailedResponse( - mocker): + mocker: Any) -> None: mocker.patch.multiple(routines.OnlineRepositoryRoutine, __abstractmethods__=set()) + genericBitbucketRequest = requests.RepositoryRoutineRequestModel( + repositoryURL="https://bitbucket.com/owner/repo", outputDirectory="./") genericRoutine = routines.OnlineRepositoryRoutine() - response = genericRoutine.bitbucketImplementation(request=None, session=None) + response = genericRoutine.bitbucketImplementation(request=genericBitbucketRequest, session=None) assert(not response.wasSuccessful()) assert(response.hasMessage()) - assert(response.getMessage() == "This routine has no implementation available \ + message = response.getMessage() + assert(message is not None) + assert(message == "This routine has no implementation available \ to handle a Bitbucket repository.") diff --git a/tests/test_data.py b/tests/test_data.py index d0acaa4..ce120a9 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -2,23 +2,24 @@ import pytest import reposcanner.data as data import datetime +import pathlib -def test_AnnotatedCSVData_isDirectlyConstructible(): +def test_AnnotatedCSVData_isDirectlyConstructible() -> None: dataEntity = data.AnnotatedCSVData("test.csv") -def test_AnnotatedCSVData_isConstructibleByFactory(): +def test_AnnotatedCSVData_isConstructibleByFactory() -> None: factory = data.DataEntityFactory() factory.createAnnotatedCSVData("test.csv") -def test_AnnotatedCSVData_canGetFilePath(): +def test_AnnotatedCSVData_canGetFilePath() -> None: dataEntity = data.AnnotatedCSVData("test.csv") assert(dataEntity.getFilePath() == Path("test.csv")) -def test_AnnotatedCSVData_canGetKeysForMetadataAfterConstruction(): +def test_AnnotatedCSVData_canGetKeysForMetadataAfterConstruction() -> None: dataEntity = data.AnnotatedCSVData("test.csv") keys = dataEntity.getAttributeKeys() assert("creator" in keys) @@ -30,12 +31,12 @@ def test_AnnotatedCSVData_canGetKeysForMetadataAfterConstruction(): assert("datatypes" in keys) -def test_AnnotatedCSVData_validationOfMetadataFailsInitially(): +def test_AnnotatedCSVData_validationOfMetadataFailsInitially() -> None: dataEntity = data.AnnotatedCSVData("test.csv") assert(not dataEntity.validateMetadata()) -def test_AnnotatedCSVData_canStoreAndValidateMetadata(): +def test_AnnotatedCSVData_canStoreAndValidateMetadata() -> None: dataEntity = data.AnnotatedCSVData("test.csv") timestamp = datetime.date.today() columnNames = ["contributor", "numberOfCommits"] @@ -62,12 +63,12 @@ def test_AnnotatedCSVData_canStoreAndValidateMetadata(): assert(dataEntity.validateMetadata()) -def test_AnnotatedCSVData_initiallyHasNoRecords(): +def test_AnnotatedCSVData_initiallyHasNoRecords() -> None: dataEntity = data.AnnotatedCSVData("test.csv") assert(len(dataEntity.getRawRecords()) == 0) -def test_AnnotatedCSVData_canProduceRecordDictionaries(): +def test_AnnotatedCSVData_canProduceRecordDictionaries() -> None: dataEntity = data.AnnotatedCSVData("test.csv") columnNames = ["contributor", "numberOfCommits"] dataEntity.setColumnNames(columnNames) @@ -88,7 +89,7 @@ def test_AnnotatedCSVData_canProduceRecordDictionaries(): assert(entry["numberOfCommits"] == 552) -def test_AnnotatedCSVData_canConvertToDataFrame(): +def test_AnnotatedCSVData_canConvertToDataFrame() -> None: dataEntity = data.AnnotatedCSVData("test.csv") columnNames = ["contributor", "numberOfCommits"] dataEntity.setColumnNames(columnNames) @@ -104,7 +105,7 @@ def test_AnnotatedCSVData_canConvertToDataFrame(): assert(frame["numberOfCommits"][2] == 77) -def test_AnnotatedCSVData_canConvertToDataFrameFromFileWithFirstRowHeader(): +def test_AnnotatedCSVData_canConvertToDataFrameFromFileWithFirstRowHeader() -> None: dataEntity = data.AnnotatedCSVData("test.csv") dataEntity.addRecord(["contributor", "numberOfCommits"]) dataEntity.addRecord(["johnsmith", 552]) @@ -119,9 +120,8 @@ def test_AnnotatedCSVData_canConvertToDataFrameFromFileWithFirstRowHeader(): assert(frame["numberOfCommits"][2] == 77) -def test_AnnotatedCSVData_canStoreDataToDisk(tmpdir): - sub = tmpdir.mkdir("datatest") - filePath = str(sub.join("csvtest.csv")) +def test_AnnotatedCSVData_canStoreDataToDisk(tmp_path: pathlib.Path) -> None: + filePath = tmp_path / "csvtest.csv" dataEntity = data.AnnotatedCSVData(filePath) timestamp = datetime.date.today() columnNames = ["contributor", "numberOfCommits"] @@ -156,23 +156,22 @@ def test_AnnotatedCSVData_canStoreDataToDisk(tmpdir): assert(dataEntityB.validateMetadata()) -def test_YAMLData_isDirectlyConstructible(): +def test_YAMLData_isDirectlyConstructible() -> None: dataEntity = data.YAMLData("test.yaml") -def test_AnnotatedYAMLData_isConstructibleByFactory(): +def test_AnnotatedYAMLData_isConstructibleByFactory() -> None: factory = data.DataEntityFactory() factory.createYAMLData("test.yaml") -def test_YAMLData_initiallyHoldsNoData(): +def test_YAMLData_initiallyHoldsNoData() -> None: dataEntity = data.YAMLData("test.yaml") assert(len(dataEntity.getData()) == 0) -def test_YAMLData_canReadDataFromDisk(tmpdir): - sub = tmpdir.mkdir("datatest") - filePath = str(sub.join("test.yaml")) +def test_YAMLData_canReadDataFromDisk(tmp_path: pathlib.Path) -> None: + filePath = tmp_path / "test.yaml" with open(filePath, 'w') as outfile: contents = """ @@ -193,9 +192,8 @@ def test_YAMLData_canReadDataFromDisk(tmpdir): assert('urls' in dataDict['ADTR02'] and len(dataDict['ADTR02']['urls']) == 3) -def test_YAMLData_canStoreDataToDisk(tmpdir): - sub = tmpdir.mkdir("datatest") - filePath = str(sub.join("test.yaml")) +def test_YAMLData_canStoreDataToDisk(tmp_path: pathlib.Path) -> None: + filePath = tmp_path / "test.yaml" dataEntity = data.YAMLData(filePath) dataDict = { 'ADTR02': { @@ -216,9 +214,10 @@ def test_YAMLData_canStoreDataToDisk(tmpdir): assert('urls' in dataDictB['ADTR02'] and len(dataDictB['ADTR02']['urls']) == 3) -def test_YAMLData_canSupportNestedParametersForMiningRoutineConfigurations(tmpdir): - sub = tmpdir.mkdir("datatest") - filePath = str(sub.join("config.yaml")) +def test_YAMLData_canSupportNestedParametersForMiningRoutineConfigurations( + tmp_path: pathlib.Path) -> None: + sub = tmp_path + filePath = sub / "config.yaml" with open(filePath, 'w') as outfile: contents = """ @@ -243,16 +242,16 @@ def test_YAMLData_canSupportNestedParametersForMiningRoutineConfigurations(tmpdi assert(dataDict['routines'][1]['ExternalToolRoutine']['verbose'] == False) -def test_DataEntityStore_isDirectlyConstructible(): +def test_DataEntityStore_isDirectlyConstructible() -> None: store = data.DataEntityStore() -def test_DataEntityStore_isInitiallyEmpty(): +def test_DataEntityStore_isInitiallyEmpty() -> None: store = data.DataEntityStore() assert(len(store) == 0) -def test_DataEntityStore_canInsertAndRemoveEntities(): +def test_DataEntityStore_canInsertAndRemoveEntities() -> None: store = data.DataEntityStore() entityA = data.YAMLData("repositories.yaml") @@ -275,7 +274,7 @@ def test_DataEntityStore_canInsertAndRemoveEntities(): assert(len(store) == 0) -def test_DataEntityStore_canReadOverAllEntities(): +def test_DataEntityStore_canReadOverAllEntities() -> None: store = data.DataEntityStore() for i in range(20): @@ -283,12 +282,12 @@ def test_DataEntityStore_canReadOverAllEntities(): store.insert(entity) numberOfEntitiesInStore = 0 - for entity in store.read(): + for _ in store.read(): numberOfEntitiesInStore += 1 assert(numberOfEntitiesInStore == 20) -def test_DataEntityStore_canFilterByCriteria(): +def test_DataEntityStore_canFilterByCriteria() -> None: commitCountsA = data.AnnotatedCSVData("commitcounts_a.csv") commitCountsB = data.AnnotatedCSVData("commitcounts_b.csv") commitCountsC = data.AnnotatedCSVData("commitcounts_c.csv") @@ -319,14 +318,15 @@ def test_DataEntityStore_canFilterByCriteria(): store.insert(contributorListB) store.insert(contributorListC) - def criteria_OnlyCommitCounts(entity): + def criteria_OnlyCommitCounts(entity: data.ReposcannerDataEntity) -> bool: return entity.getCreator() == "CommitCountRoutine" onlyCommitCounts = store.getByCriteria(criteria_OnlyCommitCounts) assert(len(onlyCommitCounts) == 3) - def criteria_OnlyAdAstraRelated(entity): - return entity.getURL() == "https://bitbucket.com/Ligre/AdAstra" + def criteria_OnlyAdAstraRelated(entity: data.ReposcannerDataEntity) -> bool: + return (isinstance(entity, data.AnnotatedCSVData) + and entity.getURL() == "https://bitbucket.com/Ligre/AdAstra") onlyAdAstraRelated = store.getByCriteria(criteria_OnlyAdAstraRelated) assert(len(onlyAdAstraRelated) == 2) diff --git a/tests/test_dummyWorkflow.py b/tests/test_dummyWorkflow.py index b7dc900..a087278 100644 --- a/tests/test_dummyWorkflow.py +++ b/tests/test_dummyWorkflow.py @@ -6,23 +6,23 @@ import pytest -def test_DummyOfflineRoutineRequest_isDirectlyConstructible(): +def test_DummyOfflineRoutineRequest_isDirectlyConstructible() -> None: request = dummy.DummyOfflineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", workspaceDirectory="./") -def test_DummyOfflineRoutine_isDirectlyConstructible(): +def test_DummyOfflineRoutine_isDirectlyConstructible() -> None: routine = dummy.DummyOfflineRoutine() -def test_DummyOfflineRoutine_hasMatchingRequestType(): +def test_DummyOfflineRoutine_hasMatchingRequestType() -> None: routine = dummy.DummyOfflineRoutine() assert(routine.getRequestType() == dummy.DummyOfflineRoutineRequest) -def test_DummyOfflineRoutine_canHandleAppropriateRequest(): +def test_DummyOfflineRoutine_canHandleAppropriateRequest() -> None: request = dummy.DummyOfflineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", @@ -32,7 +32,7 @@ def test_DummyOfflineRoutine_canHandleAppropriateRequest(): assert(routine.canHandleRequest(request)) -def test_DummyOfflineRoutine_willRejectInAppropriateRequest(): +def test_DummyOfflineRoutine_willRejectInAppropriateRequest() -> None: request = reposcanner.requests.BaseRequestModel() routine = dummy.DummyOfflineRoutine() assert(not isinstance(request, routine.getRequestType())) @@ -41,23 +41,23 @@ def test_DummyOfflineRoutine_willRejectInAppropriateRequest(): assert(not routine.canHandleRequest(request)) -def test_DummyOnlineRoutineRequest_isDirectlyConstructible(): +def test_DummyOnlineRoutineRequest_isDirectlyConstructible() -> None: request = dummy.DummyOnlineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", token="ab5571mc1") -def test_DummyOnlineRoutine_isDirectlyConstructible(): +def test_DummyOnlineRoutine_isDirectlyConstructible() -> None: routine = dummy.DummyOnlineRoutine() -def test_DummyOnlineRoutine_hasMatchingRequestType(): +def test_DummyOnlineRoutine_hasMatchingRequestType() -> None: routine = dummy.DummyOnlineRoutine() assert(routine.getRequestType() == dummy.DummyOnlineRoutineRequest) -def test_DummyOnlineRoutine_canHandleAppropriateRequest(): +def test_DummyOnlineRoutine_canHandleAppropriateRequest() -> None: request = dummy.DummyOnlineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", @@ -67,7 +67,7 @@ def test_DummyOnlineRoutine_canHandleAppropriateRequest(): assert(routine.canHandleRequest(request)) -def test_DummyOnlineRoutine_willRejectInAppropriateRequest(): +def test_DummyOnlineRoutine_willRejectInAppropriateRequest() -> None: request = reposcanner.requests.BaseRequestModel() routine = dummy.DummyOnlineRoutine() assert(not isinstance(request, routine.getRequestType())) @@ -76,11 +76,11 @@ def test_DummyOnlineRoutine_willRejectInAppropriateRequest(): assert(not routine.canHandleRequest(request)) -def test_DummyAnalysisRequest_isDirectlyConstructible(): +def test_DummyAnalysisRequest_isDirectlyConstructible() -> None: request = dummy.DummyAnalysisRequest() -def test_DummyAnalysisRequest_criteriaFunctionExpectsDummyRoutineData(): +def test_DummyAnalysisRequest_criteriaFunctionExpectsDummyRoutineData() -> None: request = dummy.DummyAnalysisRequest() dataEntityFactory = data.DataEntityFactory() offlineData = dataEntityFactory.createAnnotatedCSVData( @@ -94,10 +94,10 @@ def test_DummyAnalysisRequest_criteriaFunctionExpectsDummyRoutineData(): assert(request.criteriaFunction(offlineData) is True) assert(request.criteriaFunction(onlineData) is True) - assert(request.criteriaFunction("garbage") is False) + assert(request.criteriaFunction("garbage") is False) # type: ignore -def test_DummyAnalysisRequest_canFetchDataFromStore(): +def test_DummyAnalysisRequest_canFetchDataFromStore() -> None: store = data.DataEntityStore() dataEntityFactory = data.DataEntityFactory() offlineData = dataEntityFactory.createAnnotatedCSVData( @@ -115,18 +115,18 @@ def test_DummyAnalysisRequest_canFetchDataFromStore(): assert(len(fetchedData) == 2) -def test_DummyAnalysis_isDirectlyConstructible(): +def test_DummyAnalysis_isDirectlyConstructible() -> None: analysis = dummy.DummyAnalysis() -def test_DummyAnalysis_canHandleAppropriateRequest(): +def test_DummyAnalysis_canHandleAppropriateRequest() -> None: analysis = dummy.DummyAnalysis() assert(analysis.getRequestType() == dummy.DummyAnalysisRequest) request = dummy.DummyAnalysisRequest() assert(analysis.canHandleRequest(request)) -def test_canCompleteDummyWorkflow(): +def test_canCompleteDummyWorkflow() -> None: dataEntityFactory = data.DataEntityFactory() # Imitate passing command-line arguments. args = type('', (), {})() diff --git a/tests/test_gitEntities.py b/tests/test_gitEntities.py index b8bd4ac..fb518dd 100644 --- a/tests/test_gitEntities.py +++ b/tests/test_gitEntities.py @@ -1,25 +1,26 @@ import pytest import reposcanner.git as gitEntities +from typing import Dict, List -def test_RepositoryLocation_isConstructibleByFactory(): +def test_RepositoryLocation_isConstructibleByFactory() -> None: factory = gitEntities.GitEntityFactory() repositoryLocation = factory.createRepositoryLocation( url="github.com/owner/repository") -def test_RepositoryLocation_isDirectlyConstructible(): +def test_RepositoryLocation_isDirectlyConstructible() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="github.com/owner/repository") -def test_RepositoryLocation_canStoreURL(): +def test_RepositoryLocation_canStoreURL() -> None: url = "arbitrary.edu/repo/name" repositoryLocation = gitEntities.RepositoryLocation(url="arbitrary.edu/repo/name") assert(repositoryLocation.getURL() == url) -def test_RepositoryLocation_canIdentifyGitHubURLs(): +def test_RepositoryLocation_canIdentifyGitHubURLs() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://github.com/Parallel-NetCDF/PnetCDF") assert(repositoryLocation.getVersionControlPlatform() == @@ -28,7 +29,7 @@ def test_RepositoryLocation_canIdentifyGitHubURLs(): gitEntities.RepositoryLocation.HostType.OFFICIAL) -def test_RepositoryLocation_canIdentifyGitlabURLs(): +def test_RepositoryLocation_canIdentifyGitlabURLs() -> None: repositoryLocationOfficial = gitEntities.RepositoryLocation( url="https://gitlab.com/exaalt/parsplice") assert(repositoryLocationOfficial.getVersionControlPlatform() == @@ -44,7 +45,7 @@ def test_RepositoryLocation_canIdentifyGitlabURLs(): == gitEntities.RepositoryLocation.HostType.SELFHOSTED) -def test_RepositoryLocation_canIdentifyBitbucketURLs(): +def test_RepositoryLocation_canIdentifyBitbucketURLs() -> None: repositoryLocationOfficial = gitEntities.RepositoryLocation( url="https://bitbucket.org/berkeleylab/picsar") assert(repositoryLocationOfficial.getVersionControlPlatform() == @@ -60,7 +61,7 @@ def test_RepositoryLocation_canIdentifyBitbucketURLs(): == gitEntities.RepositoryLocation.HostType.SELFHOSTED) -def test_RepositoryLocation_unrecognizedURLsAreUnknown(): +def test_RepositoryLocation_unrecognizedURLsAreUnknown() -> None: repositoryLocationA = gitEntities.RepositoryLocation( url="http://flash.uchicago.edu/site/flashcode/coderequest/") assert(repositoryLocationA.getVersionControlPlatform() == @@ -76,7 +77,7 @@ def test_RepositoryLocation_unrecognizedURLsAreUnknown(): gitEntities.RepositoryLocation.HostType.UNKNOWN) -def test_RepositoryLocation_expectedPlatformOverridesActualPlatform(): +def test_RepositoryLocation_expectedPlatformOverridesActualPlatform() -> None: repositoryLocationA = gitEntities.RepositoryLocation( url="https://code-int.ornl.gov/exnihilo/Exnihilo", expectedPlatform=gitEntities.RepositoryLocation.VersionControlPlatform.GITLAB) @@ -90,7 +91,7 @@ def test_RepositoryLocation_expectedPlatformOverridesActualPlatform(): gitEntities.RepositoryLocation.VersionControlPlatform.BITBUCKET) -def test_RepositoryLocation_providingPlatformButNotHostTypeMakesItUnknown(): +def test_RepositoryLocation_providingPlatformButNotHostTypeMakesItUnknown() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://code-int.ornl.gov/exnihilo/Exnihilo", expectedPlatform=gitEntities.RepositoryLocation.VersionControlPlatform.GITLAB) @@ -98,7 +99,7 @@ def test_RepositoryLocation_providingPlatformButNotHostTypeMakesItUnknown(): gitEntities.RepositoryLocation.HostType.UNKNOWN) -def test_RepositoryLocation_providingBothPlatformAndHostTypeRespectsBothChoices(): +def test_RepositoryLocation_providingBothPlatformAndHostTypeRespectsBothChoices() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://code-int.ornl.gov/exnihilo/Exnihilo", expectedPlatform=gitEntities.RepositoryLocation.VersionControlPlatform.GITLAB, @@ -109,7 +110,7 @@ def test_RepositoryLocation_providingBothPlatformAndHostTypeRespectsBothChoices( gitEntities.RepositoryLocation.HostType.SELFHOSTED) -def test_RepositoryLocation_providingBothOwnerAndRepositoryNameRespectsBothChoices(): +def test_RepositoryLocation_providingBothOwnerAndRepositoryNameRespectsBothChoices() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://bitbucket.hdfgroup.org/scm/hdffv/hdf5", expectedOwner="hdffv", @@ -118,49 +119,49 @@ def test_RepositoryLocation_providingBothOwnerAndRepositoryNameRespectsBothChoic assert(repositoryLocation.getRepositoryName() == "hdf5") -def test_RepositoryLocation_canParseOwnerAndNameOfGitHubRepository(): +def test_RepositoryLocation_canParseOwnerAndNameOfGitHubRepository() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://github.com/Parallel-NetCDF/PnetCDF") assert(repositoryLocation.getOwner() == "Parallel-NetCDF") assert(repositoryLocation.getRepositoryName() == "PnetCDF") -def test_RepositoryLocation_canParseOwnerAndNameOfOfficialGitlabRepository(): +def test_RepositoryLocation_canParseOwnerAndNameOfOfficialGitlabRepository() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://gitlab.com/exaalt/parsplice") assert(repositoryLocation.getOwner() == "exaalt") assert(repositoryLocation.getRepositoryName() == "parsplice") -def test_RepositoryLocation_canParseOwnerAndNameOfSelfHostedGitlabRepository(): +def test_RepositoryLocation_canParseOwnerAndNameOfSelfHostedGitlabRepository() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://xgitlab.cels.anl.gov/darshan/darshancode") assert(repositoryLocation.getOwner() == "darshan") assert(repositoryLocation.getRepositoryName() == "darshancode") -def test_RepositoryLocation_canParseOwnerAndNameOfOfficialBitbucketRepository(): +def test_RepositoryLocation_canParseOwnerAndNameOfOfficialBitbucketRepository() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://bitbucket.org/berkeleylab/picsar") assert(repositoryLocation.getOwner() == "berkeleylab") assert(repositoryLocation.getRepositoryName() == "picsar") -def test_RepositoryLocation_canParseOwnerAndNameOfSelfHostedBitbucketRepository(): +def test_RepositoryLocation_canParseOwnerAndNameOfSelfHostedBitbucketRepository() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="bitbucket.snl.gov/project/repo") assert(repositoryLocation.getOwner() == "project") assert(repositoryLocation.getRepositoryName() == "repo") -def test_RepositoryLocation_canParseOwnerAndNameOfUnknownRepository(): +def test_RepositoryLocation_canParseOwnerAndNameOfUnknownRepository() -> None: repositoryLocation = gitEntities.RepositoryLocation( url="https://code-int.ornl.gov/exnihilo/Exnihilo") assert(repositoryLocation.getOwner() == "exnihilo") assert(repositoryLocation.getRepositoryName() == "Exnihilo") -def test_RepositoryLocation_AllOrNothingForPartialMatchesOnOwnerAndRepo(): +def test_RepositoryLocation_AllOrNothingForPartialMatchesOnOwnerAndRepo() -> None: repositoryLocationA = gitEntities.RepositoryLocation( url="https://github.com/Parallel-NetCDF/") assert(repositoryLocationA.getOwner() is None) @@ -192,18 +193,18 @@ def test_RepositoryLocation_AllOrNothingForPartialMatchesOnOwnerAndRepo(): assert(repositoryLocationE.getRepositoryName() is None) -def test_VersionControlPlatformCredentials_isConstructibleByFactory(): +def test_VersionControlPlatformCredentials_isConstructibleByFactory() -> None: factory = gitEntities.GitEntityFactory() credentials = factory.createVersionControlPlatformCredentials( username="name", password="password", token="token") -def test_VersionControlPlatformCredentials_isDirectlyConstructible(): +def test_VersionControlPlatformCredentials_isDirectlyConstructible() -> None: credentials = gitEntities.VersionControlPlatformCredentials( username="name", password="password", token="token") -def test_VersionControlPlatformCredentials_canStoreCredentialValues(): +def test_VersionControlPlatformCredentials_canStoreCredentialValues() -> None: credentials = gitEntities.VersionControlPlatformCredentials( username="name", password="password", token="token") assert(credentials.getUsername() == "name") @@ -211,41 +212,41 @@ def test_VersionControlPlatformCredentials_canStoreCredentialValues(): assert(credentials.getToken() == "token") -def test_VersionControlPlatformCredentials_allowsUsernameAndPasswordComboInConstructor(): +def test_VersionControlPlatformCredentials_allowsUsernameAndPasswordComboInConstructor() -> None: credentials = gitEntities.VersionControlPlatformCredentials( username="name", password="password") assert(credentials.hasUsernameAndPasswordAvailable()) assert(not credentials.hasTokenAvailable()) -def test_VersionControlPlatformCredentials_allowsJustTokenInConstructor(): +def test_VersionControlPlatformCredentials_allowsJustTokenInConstructor() -> None: credentials = gitEntities.VersionControlPlatformCredentials(token="token") assert(not credentials.hasUsernameAndPasswordAvailable()) assert(credentials.hasTokenAvailable()) -def test_VersionControlPlatformCredentials_allDefaultParametersInConstructorIsError(): +def test_VersionControlPlatformCredentials_allDefaultParametersInConstructorIsError() -> None: with pytest.raises(ValueError): credentials = gitEntities.VersionControlPlatformCredentials() -def test_VersionControlPlatformCredentials_providingOnlyUsernameWithoutPasswordOrViceVersaIsError(): +def test_VersionControlPlatformCredentials_providingOnlyUsernameWithoutPasswordOrViceVersaIsError() -> None: with pytest.raises(ValueError): credentials = gitEntities.VersionControlPlatformCredentials(username="name") with pytest.raises(ValueError): credentials = gitEntities.VersionControlPlatformCredentials(password="password") -def test_CredentialKeychain_canConstructEmptyKeychain(): +def test_CredentialKeychain_canConstructEmptyKeychain() -> None: keychain = gitEntities.CredentialKeychain(credentialsDictionary={}) -def test_CredentialKeychain_anEmptyKeychainHasLengthOfZero(): +def test_CredentialKeychain_anEmptyKeychainHasLengthOfZero() -> None: keychain = gitEntities.CredentialKeychain(credentialsDictionary={}) assert(len(keychain) == 0) -def test_CredentialKeychain_canStoreValidCredentials(): +def test_CredentialKeychain_canStoreValidCredentials() -> None: credentialsDictionary = {} entry = {"url": "https://github.com/", "token": "ab341m32"} credentialsDictionary["githubplatform"] = entry @@ -254,14 +255,14 @@ def test_CredentialKeychain_canStoreValidCredentials(): assert(len(keychain) == 1) -def test_CredentialKeychain_credentialsMustBeInDictionary(): - credentialsDictionary = [] +def test_CredentialKeychain_credentialsMustBeInDictionary() -> None: + credentialsDictionary: List[int] = [] with pytest.raises(TypeError): keychain = gitEntities.CredentialKeychain( - credentialsDictionary=credentialsDictionary) + credentialsDictionary=credentialsDictionary) # type: ignore -def test_CredentialKeychain_thereCanOnlyBeOneCredentialObjectForEachUniqueURL(): +def test_CredentialKeychain_thereCanOnlyBeOneCredentialObjectForEachUniqueURL() -> None: credentialsDictionary = {} entryA = {"url": "https://github.com/", "token": "ab341m32"} entryB = {"url": "https://github.com/", "token": "cak13113"} @@ -272,7 +273,7 @@ def test_CredentialKeychain_thereCanOnlyBeOneCredentialObjectForEachUniqueURL(): assert(len(keychain) == 1) -def test_CredentialKeychain_canMatchRepositoryLocationWithCredentials(): +def test_CredentialKeychain_canMatchRepositoryLocationWithCredentials() -> None: credentialsDictionary = {} entry = {"url": "https://github.com/", "token": "ab341m32"} credentialsDictionary["githubplatform"] = entry @@ -289,7 +290,7 @@ def test_CredentialKeychain_canMatchRepositoryLocationWithCredentials(): assert(lookupResult.getToken() == "ab341m32") -def test_CredentialKeychain_canFailToFindCredentials(): +def test_CredentialKeychain_canFailToFindCredentials() -> None: credentialsDictionary = {} entry = {"url": "https://github.com/", "token": "ab341m32"} credentialsDictionary["githubplatform"] = entry @@ -303,7 +304,7 @@ def test_CredentialKeychain_canFailToFindCredentials(): assert(lookupResult is None) -def test_CredentialKeychain_ifMultipleEntriesMatchLongestIsChosen(): +def test_CredentialKeychain_ifMultipleEntriesMatchLongestIsChosen() -> None: credentialsDictionary = {} entryA = {"url": "https://github.com/", "token": "ab341m32"} entryB = {"url": "https://github.com/Parallel-NetCDF", "token": "q198krq13"} @@ -326,23 +327,23 @@ def test_CredentialKeychain_ifMultipleEntriesMatchLongestIsChosen(): assert(lookupResult.getToken() == "14l1mn8a") -def test_GitHubAPISessionCreator_isConstructibleByFactory(): +def test_GitHubAPISessionCreator_isConstructibleByFactory() -> None: factory = gitEntities.GitEntityFactory() githubCreator = factory.createGitHubAPISessionCreator() -def test_GitHubAPISessionCreator_isDirectlyConstructible(): +def test_GitHubAPISessionCreator_isDirectlyConstructible() -> None: githubCreator = gitEntities.GitHubAPISessionCreator() -def test_GitHubAPISessionCreator_canHandleAppropriateRepository(): +def test_GitHubAPISessionCreator_canHandleAppropriateRepository() -> None: githubCreator = gitEntities.GitHubAPISessionCreator() repositoryLocation = gitEntities.RepositoryLocation( url="https://github.com/Parallel-NetCDF/PnetCDF") assert(githubCreator.canHandleRepository(repositoryLocation)) -def test_GitHubAPISessionCreator_rejectsInappropriateRepositories(): +def test_GitHubAPISessionCreator_rejectsInappropriateRepositories() -> None: githubCreator = gitEntities.GitHubAPISessionCreator() repositoryLocationGitlab = gitEntities.RepositoryLocation( url="https://gitlab.com/exaalt/parsplice") @@ -354,28 +355,28 @@ def test_GitHubAPISessionCreator_rejectsInappropriateRepositories(): assert(not githubCreator.canHandleRepository(repositoryLocationGarbage)) -def test_VCSAPISessionCompositeCreator_isConstructibleByFactory(): +def test_VCSAPISessionCompositeCreator_isConstructibleByFactory() -> None: factory = gitEntities.GitEntityFactory() githubCreator = factory.createVCSAPISessionCompositeCreator() -def test_VCSAPISessionCompositeCreator_isDirectlyConstructible(): +def test_VCSAPISessionCompositeCreator_isDirectlyConstructible() -> None: compositeCreator = gitEntities.VCSAPISessionCompositeCreator() -def test_VCSAPISessionCompositeCreator_InitiallyHasNoChildren(): +def test_VCSAPISessionCompositeCreator_InitiallyHasNoChildren() -> None: compositeCreator = gitEntities.VCSAPISessionCompositeCreator() assert (compositeCreator.getNumberOfChildren() == 0) -def test_VCSAPISessionCompositeCreator_CantFulfillRequestsWithoutChildren(): +def test_VCSAPISessionCompositeCreator_CantFulfillRequestsWithoutChildren() -> None: compositeCreator = gitEntities.VCSAPISessionCompositeCreator() repositoryLocation = gitEntities.RepositoryLocation( url="https://github.com/Parallel-NetCDF/PnetCDF") assert (compositeCreator.canHandleRepository(repositoryLocation) == False) -def test_VCSAPISessionCompositeCreator_CanStoreChildren(): +def test_VCSAPISessionCompositeCreator_CanStoreChildren() -> None: compositeCreator = gitEntities.VCSAPISessionCompositeCreator() githubCreator = gitEntities.GitHubAPISessionCreator() compositeCreator.addChild(githubCreator) @@ -383,7 +384,7 @@ def test_VCSAPISessionCompositeCreator_CanStoreChildren(): assert(compositeCreator.getNumberOfChildren() == 1) -def test_VCSAPISessionCompositeCreator_CanRemoveChildren(): +def test_VCSAPISessionCompositeCreator_CanRemoveChildren() -> None: compositeCreator = gitEntities.VCSAPISessionCompositeCreator() githubCreator = gitEntities.GitHubAPISessionCreator() compositeCreator.addChild(githubCreator) @@ -391,7 +392,7 @@ def test_VCSAPISessionCompositeCreator_CanRemoveChildren(): assert(not compositeCreator.hasChild(githubCreator)) -def test_VCSAPISessionCompositeCreator_CanFulfillRequestIfChildCan(): +def test_VCSAPISessionCompositeCreator_CanFulfillRequestIfChildCan() -> None: compositeCreator = gitEntities.VCSAPISessionCompositeCreator() githubCreator = gitEntities.GitHubAPISessionCreator() compositeCreator.addChild(githubCreator) @@ -400,7 +401,7 @@ def test_VCSAPISessionCompositeCreator_CanFulfillRequestIfChildCan(): assert(compositeCreator.canHandleRepository(repositoryLocation)) -def test_VCSAPISessionCompositeCreator_CanFulfillRequestIfChildCant(): +def test_VCSAPISessionCompositeCreator_CanFulfillRequestIfChildCant() -> None: compositeCreator = gitEntities.VCSAPISessionCompositeCreator() githubCreator = gitEntities.GitHubAPISessionCreator() compositeCreator.addChild(githubCreator) @@ -409,23 +410,23 @@ def test_VCSAPISessionCompositeCreator_CanFulfillRequestIfChildCant(): assert(not compositeCreator.canHandleRepository(repositoryLocation)) -def test_GitlabAPISessionCreator_isConstructibleByFactory(): +def test_GitlabAPISessionCreator_isConstructibleByFactory() -> None: factory = gitEntities.GitEntityFactory() githubCreator = factory.createGitlabAPISessionCreator() -def test_GitlabAPISessionCreator_isDirectlyConstructible(): +def test_GitlabAPISessionCreator_isDirectlyConstructible() -> None: gitlabCreator = gitEntities.GitlabAPISessionCreator() -def test_GitlabAPISessionCreator_canHandleAppropriateRepository(): +def test_GitlabAPISessionCreator_canHandleAppropriateRepository() -> None: gitlabCreator = gitEntities.GitlabAPISessionCreator() repositoryLocation = gitEntities.RepositoryLocation( url="https://gitlab.com/exaalt/parsplice") assert(gitlabCreator.canHandleRepository(repositoryLocation)) -def test_GitlabAPISessionCreator_rejectsInappropriateRepositories(): +def test_GitlabAPISessionCreator_rejectsInappropriateRepositories() -> None: gitlabCreator = gitEntities.GitlabAPISessionCreator() repositoryLocationGitlab = gitEntities.RepositoryLocation( url="https://github.com/Parallel-NetCDF/PnetCDF") @@ -437,7 +438,7 @@ def test_GitlabAPISessionCreator_rejectsInappropriateRepositories(): assert(not gitlabCreator.canHandleRepository(repositoryLocationGarbage)) -def test_GitlabAPISessionCreator_usernameAndPasswordComboWillTriggerRuntimeError(): +def test_GitlabAPISessionCreator_usernameAndPasswordComboWillTriggerRuntimeError() -> None: gitlabCreator = gitEntities.GitlabAPISessionCreator() repositoryLocation = gitEntities.RepositoryLocation( url="https://gitlab.com/repo/owner") diff --git a/tests/test_implementedAnalyses.py b/tests/test_implementedAnalyses.py index 204fc45..cdca6d0 100644 --- a/tests/test_implementedAnalyses.py +++ b/tests/test_implementedAnalyses.py @@ -1,12 +1,13 @@ import reposcanner.contrib as contrib import reposcanner.data as data +import pathlib -def test_TeamSizeAndDistributionAnalysisRequest_isDirectlyConstructible(): +def test_TeamSizeAndDistributionAnalysisRequest_isDirectlyConstructible() -> None: request = contrib.TeamSizeAndDistributionAnalysisRequest() -def test_TeamSizeAndDistributionAnalysisRequest_criteriaFunctionRecognizesNecessaryFiles(): +def test_TeamSizeAndDistributionAnalysisRequest_criteriaFunctionRecognizesNecessaryFiles() -> None: request = contrib.TeamSizeAndDistributionAnalysisRequest() dataEntityFactory = data.DataEntityFactory() @@ -25,7 +26,7 @@ def test_TeamSizeAndDistributionAnalysisRequest_criteriaFunctionRecognizesNecess assert(irregularData) -def test_TeamSizeAndDistributionAnalysisRequest_fetchesDataFromStore(): +def test_TeamSizeAndDistributionAnalysisRequest_fetchesDataFromStore() -> None: request = contrib.TeamSizeAndDistributionAnalysisRequest() dataEntityFactory = data.DataEntityFactory() @@ -42,7 +43,7 @@ def test_TeamSizeAndDistributionAnalysisRequest_fetchesDataFromStore(): store.insert(fileA) store.insert(fileB) store.insert(fileC) - store.insert(irregularData) + store.insert(irregularData) # type: ignore assert(len(request.getData()) == 0) request.fetchDataFromStore(store) @@ -52,18 +53,18 @@ def test_TeamSizeAndDistributionAnalysisRequest_fetchesDataFromStore(): assert(fileB in dataInsideRequest) -def test_TeamSizeAndDistributionAnalysis_isDirectlyConstructible(): +def test_TeamSizeAndDistributionAnalysis_isDirectlyConstructible() -> None: analysis = contrib.TeamSizeAndDistributionAnalysis() -def test_TeamSizeAndDistributionAnalysis_requestTypeMatchesExpectedType(): +def test_TeamSizeAndDistributionAnalysis_requestTypeMatchesExpectedType() -> None: analysis = contrib.TeamSizeAndDistributionAnalysis() assert(analysis.getRequestType() == contrib.TeamSizeAndDistributionAnalysisRequest) request = contrib.TeamSizeAndDistributionAnalysisRequest() assert(analysis.canHandleRequest(request)) -def test_TeamSizeAndDistributionAnalysis_analysisReturnsFailureResponseIfThereIsNoContributorDataAvailable(): +def test_TeamSizeAndDistributionAnalysis_analysisReturnsFailureResponseIfThereIsNoContributorDataAvailable() -> None: request = contrib.TeamSizeAndDistributionAnalysisRequest() store = data.DataEntityStore() # Store is empty! No data available to compute the analysis! request.fetchDataFromStore(store) @@ -75,7 +76,7 @@ def test_TeamSizeAndDistributionAnalysis_analysisReturnsFailureResponseIfThereIs assert(response.getMessage() == "Received no ContributorAccountListRoutine data.") -def test_TeamSizeAndDistributionAnalysis_analysisReturnsFailureResponseIfLoginDataNotAvailable(): +def test_TeamSizeAndDistributionAnalysis_analysisReturnsFailureResponseIfLoginDataNotAvailable() -> None: store = data.DataEntityStore() dataEntityFactory = data.DataEntityFactory() analysis = contrib.TeamSizeAndDistributionAnalysis() @@ -92,8 +93,8 @@ def test_TeamSizeAndDistributionAnalysis_analysisReturnsFailureResponseIfLoginDa assert(not noLoginDataResponse.wasSuccessful()) -def test_TeamSizeAndDistributionAnalysis_canHandleTestData(tmpdir): - def generateContributorAccountListFile(store): +def test_TeamSizeAndDistributionAnalysis_canHandleTestData(tmp_path: pathlib.Path) -> None: + def generateContributorAccountListFile(store: data.DataEntityStore) -> None: dataEntityFactory = data.DataEntityFactory() contributorAccountFile = dataEntityFactory.createAnnotatedCSVData( "data/contrib_account_file.csv") @@ -111,7 +112,7 @@ def generateContributorAccountListFile(store): ["jimthewizard", "Jimothy Forrest", "jforrest@lbl@gov"]) store.insert(contributorAccountFile) - def generateGithubLoginFile(store): + def generateGithubLoginFile(store: data.DataEntityStore) -> None: dataEntityFactory = data.DataEntityFactory() githubLoginFile = dataEntityFactory.createAnnotatedCSVData( "data/github_login.csv") @@ -123,7 +124,7 @@ def generateGithubLoginFile(store): githubLoginFile.addRecord([2, "jimthewizard", 138]) store.insert(githubLoginFile) - def generateECPMembersFile(store): + def generateECPMembersFile(store: data.DataEntityStore) -> None: dataEntityFactory = data.DataEntityFactory() membersFile = dataEntityFactory.createAnnotatedCSVData("data/members.csv") membersFile.setCreator("external") @@ -140,9 +141,9 @@ def generateECPMembersFile(store): generateGithubLoginFile(store) generateECPMembersFile(store) - mockOutputDirectory = tmpdir.mkdir("./mockoutput/") + mockOutputDirectory = tmp_path request = contrib.TeamSizeAndDistributionAnalysisRequest( - outputDirectory=mockOutputDirectory) + outputDirectory=str(mockOutputDirectory)) request.fetchDataFromStore(store) analysis = contrib.TeamSizeAndDistributionAnalysis() @@ -150,6 +151,7 @@ def generateECPMembersFile(store): assert(response.wasSuccessful()) csvOutput = response.getAttachments()[0] + assert(isinstance(csvOutput, data.AnnotatedCSVData)) assert(csvOutput.getCreator() == "TeamSizeAndDistributionAnalysis") scikitEntry = csvOutput.getRawRecords()[0] print(scikitEntry) diff --git a/tests/test_implementedRoutines.py b/tests/test_implementedRoutines.py index 0eafa53..b043f72 100644 --- a/tests/test_implementedRoutines.py +++ b/tests/test_implementedRoutines.py @@ -3,29 +3,29 @@ import reposcanner.requests -def test_ContributorAccountListRoutineRequest_isDirectlyConstructible(): +def test_ContributorAccountListRoutineRequest_isDirectlyConstructible() -> None: request = contributionRoutines.ContributorAccountListRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", token="ab5571mc1") -def test_ContributorAccountListRoutine_isDirectlyConstructible(): +def test_ContributorAccountListRoutine_isDirectlyConstructible() -> None: routine = contributionRoutines.ContributorAccountListRoutine() -def test_ContributorAccountListRoutine_hasMatchingRequestType(): +def test_ContributorAccountListRoutine_hasMatchingRequestType() -> None: routine = contributionRoutines.ContributorAccountListRoutine() assert(routine.getRequestType() == contributionRoutines.ContributorAccountListRoutineRequest) -def test_ContributorAccountListRoutine_canHandleAppropriateRequest(): +def test_ContributorAccountListRoutine_canHandleAppropriateRequest() -> None: request = contributionRoutines.ContributorAccountListRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", token="ab5571mc1") routine = contributionRoutines.ContributorAccountListRoutine() assert(routine.canHandleRequest(request)) -def test_ContributorAccountListRoutine_willRejectInAppropriateRequest(): +def test_ContributorAccountListRoutine_willRejectInAppropriateRequest() -> None: request = reposcanner.requests.BaseRequestModel() routine = contributionRoutines.ContributorAccountListRoutine() diff --git a/tests/test_manager.py b/tests/test_manager.py index 52a7e13..2ae6273 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -1,10 +1,11 @@ import reposcanner.manager as management import reposcanner.requests as requests import reposcanner.data as data +import pathlib import pytest -def test_ManagerRepositoryRoutineTask_isDirectlyConstructible(): +def test_ManagerRepositoryRoutineTask_isDirectlyConstructible() -> None: task = management.ManagerRepositoryRoutineTask( projectID="PROJID", projectName="SciKit", @@ -14,7 +15,7 @@ def test_ManagerRepositoryRoutineTask_isDirectlyConstructible(): outputDirectory="./")) -def test_ManagerRepositoryRoutineTask_isConstructibleByFactory(): +def test_ManagerRepositoryRoutineTask_isConstructibleByFactory() -> None: factory = management.TaskFactory() task = factory.createManagerRepositoryRoutineTask( projectID="PROJID", @@ -25,16 +26,16 @@ def test_ManagerRepositoryRoutineTask_isConstructibleByFactory(): outputDirectory="./")) -def test_ManagerAnalysisTask_isDirectlyConstructible(): +def test_ManagerAnalysisTask_isDirectlyConstructible() -> None: task = management.ManagerAnalysisTask(request=requests.AnalysisRequestModel()) -def test_ManagerAnalysisTask_isConstructibleByFactory(): +def test_ManagerAnalysisTask_isConstructibleByFactory() -> None: factory = management.TaskFactory() task = factory.createManagerAnalysisTask(request=requests.AnalysisRequestModel()) -def test_ReposcannerManager_isDirectlyConstructible(): +def test_ReposcannerManager_isDirectlyConstructible() -> None: args = type('', (), {})() args.outputDirectory = "./" args.workspaceDirectory = "./" @@ -46,39 +47,31 @@ def test_ReposcannerManager_isDirectlyConstructible(): gui=args.gui) -def test_ReposcannerManager_GUIModeIsDisabledByDefault(): - manager = management.ReposcannerManager( - notebook=None, outputDirectory=None, workspaceDirectory=None) +def test_ReposcannerManager_GUIModeIsDisabledByDefault() -> None: + manager = management.ReposcannerManager(notebook=None) assert(not manager.isGUIModeEnabled()) -def test_ReposcannerManager_GUIModeCanBeEnabledAtConstructionTime(): +def test_ReposcannerManager_GUIModeCanBeEnabledAtConstructionTime() -> None: manager = management.ReposcannerManager( notebook=None, - outputDirectory=None, - workspaceDirectory=None, gui=True) assert(manager.isGUIModeEnabled()) -def test_ReposcannerManager_initiallyHasNoRoutinesOrAnalyses(): +def test_ReposcannerManager_initiallyHasNoRoutinesOrAnalyses() -> None: manager = management.ReposcannerManager( notebook=None, - outputDirectory=None, - workspaceDirectory=None, gui=True) assert(len(manager.getRoutines()) == 0) assert(len(manager.getAnalyses()) == 0) -def test_ReposcannerManager_CanParseConfigYAMLFileAndConstructRoutines(tmpdir): +def test_ReposcannerManager_CanParseConfigYAMLFileAndConstructRoutines(tmp_path: pathlib.Path) -> None: manager = management.ReposcannerManager( notebook=None, - outputDirectory=None, - workspaceDirectory=None, gui=True) - sub = tmpdir.mkdir("managertest") - filePath = str(sub.join("config.yaml")) + filePath = tmp_path / "config.yaml" with open(filePath, 'w') as outfile: contents = """ @@ -107,14 +100,11 @@ def test_ReposcannerManager_CanParseConfigYAMLFileAndConstructRoutines(tmpdir): assert(analyses[1].__class__.__name__ == "GambitCommitAuthorshipInferenceAnalysis") -def test_ReposcannerManager_missingRoutinesInConfigCausesValueError(tmpdir): +def test_ReposcannerManager_missingRoutinesInConfigCausesValueError(tmp_path: pathlib.Path) -> None: manager = management.ReposcannerManager( notebook=None, - outputDirectory=None, - workspaceDirectory=None, gui=True) - sub = tmpdir.mkdir("managertest") - filePath = str(sub.join("config.yaml")) + filePath = tmp_path / "config.yaml" with open(filePath, 'w') as outfile: contents = """ diff --git a/tests/test_provenance.py b/tests/test_provenance.py index 214befe..c71726e 100644 --- a/tests/test_provenance.py +++ b/tests/test_provenance.py @@ -4,15 +4,20 @@ import reposcanner.provenance as provenance import reposcanner.contrib as contributionRoutines import reposcanner.manager as manager +import reposcanner.requests as requests import reposcanner.response as responses import reposcanner.data as dataEntities +import reposcanner.routines as routines -def test_ReposcannerRunInformant_isDirectlyConstructible(): +# mypy: disable-error-code="method-assign" + + +def test_ReposcannerRunInformant_isDirectlyConstructible() -> None: informant = provenance.ReposcannerRunInformant() -def test_ReposcannerRunInformant_differentInstancesProvideTheSameExecutionID(): +def test_ReposcannerRunInformant_differentInstancesProvideTheSameExecutionID() -> None: informantA = provenance.ReposcannerRunInformant() informantB = provenance.ReposcannerRunInformant() @@ -22,7 +27,7 @@ def test_ReposcannerRunInformant_differentInstancesProvideTheSameExecutionID(): assert(executionIDA == executionIDB) -def test_ReposcannerRunInformant_differentInstancesProvideTheSameVersionInfo(): +def test_ReposcannerRunInformant_differentInstancesProvideTheSameVersionInfo() -> None: informantA = provenance.ReposcannerRunInformant() informantB = provenance.ReposcannerRunInformant() @@ -32,11 +37,11 @@ def test_ReposcannerRunInformant_differentInstancesProvideTheSameVersionInfo(): assert(versionInfoA == versionInfoB) -def test_ReposcannerLabNotebook_isDirectlyConstructible(): +def test_ReposcannerLabNotebook_isDirectlyConstructible() -> None: notebook = provenance.ReposcannerLabNotebook(notebookOutputDirectory="./") -def test_ReposcannerLabNotebook_canLogArgsOnStartup(): +def test_ReposcannerLabNotebook_canLogArgsOnStartup() -> None: notebook = provenance.ReposcannerLabNotebook(notebookOutputDirectory="./") args = type('', (), {})() args.repositories = "repositories.yaml" @@ -53,7 +58,7 @@ def test_ReposcannerLabNotebook_canLogArgsOnStartup(): assert(jsonDocument['entity']['rs:config']['rs:path'] == 'config.yaml') -def test_ReposcannerLabNotebook_canLogCreatedRoutines(): +def test_ReposcannerLabNotebook_canLogCreatedRoutines() -> None: routine = contributionRoutines.ContributorAccountListRoutine() notebook = provenance.ReposcannerLabNotebook(notebookOutputDirectory="./") notebook.onRoutineCreation(routine) @@ -67,12 +72,12 @@ def test_ReposcannerLabNotebook_canLogCreatedRoutines(): assert(relationExistsBetweenManagerAndRoutine) -def test_ReposcannerLabNotebook_canLogCreatedAnalyses(): +def test_ReposcannerLabNotebook_canLogCreatedAnalyses() -> None: # TODO: We need to define what analyses are before we can deal with logging them. pass -def test_ReposcannerLabNotebook_canLogCreatedTasks(): +def test_ReposcannerLabNotebook_canLogCreatedTasks() -> None: request = contributionRoutines.ContributorAccountListRoutineRequest( repositoryURL="https://github.com/scikit/scikit", outputDirectory="./", token="ab5571mc1") task = manager.ManagerRepositoryRoutineTask( @@ -100,7 +105,7 @@ def test_ReposcannerLabNotebook_canLogCreatedTasks(): assert(relationExistsBetweenManagerAndTask) -def test_ReposcannerLabNotebook_canLogStartOfTask(): +def test_ReposcannerLabNotebook_canLogStartOfTask() -> None: notebook = provenance.ReposcannerLabNotebook(notebookOutputDirectory="./") @@ -133,12 +138,12 @@ def test_ReposcannerLabNotebook_canLogStartOfTask(): print(jsonDocument) -def test_ReposcannerLabNotebook_canLogCompletionOfTask(tmpdir): +def test_ReposcannerLabNotebook_canLogCompletionOfTask() -> None: # Overwriting methods of ContributorAccountListRoutine to return a # predetermined response. path = pathlib.Path("loggedentitytest.csv") - def generateCSVDataFile(tmpdir): + def generateCSVDataFile() -> None: informant = provenance.ReposcannerRunInformant() dataEntity = dataEntities.AnnotatedCSVData(str(path)) timestamp = datetime.date.today() @@ -159,13 +164,13 @@ def generateCSVDataFile(tmpdir): dataEntity.addRecord(["alicejones", "Alice Jones", "alice@llnl.gov"]) dataEntity.writeToFile() - generateCSVDataFile(tmpdir) + generateCSVDataFile() - def executeGeneratesResponse(self, request): + def executeGeneratesResponse(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse(attachments=[]) - factory = dataEntities.DataEntityFactory() - csvDataEntity = factory.createAnnotatedCSVData(filePath=str(path)) + factory2 = dataEntities.DataEntityFactory() + csvDataEntity = factory2.createAnnotatedCSVData(filePath=str(path)) csvDataEntity.readFromFile() response.addAttachment(csvDataEntity) return response @@ -220,13 +225,15 @@ def executeGeneratesResponse(self, request): assert(relationExistsBetweenTaskAndFile) -def test_ReposcannerLabNotebook_canLogNonstandardDataDuringCompletionOfTask(tmpdir): - def executeGeneratesResponse(self, request): +def test_ReposcannerLabNotebook_canLogNonstandardDataDuringCompletionOfTask() -> None: + def executeGeneratesResponse( + self: routines.DataMiningRoutine, + request: requests.BaseRequestModel, + ) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse(attachments=[]) - factory = dataEntities.DataEntityFactory() nonstandardData = {"a": 5, "b": 255} - response.addAttachment(nonstandardData) + response.addAttachment(nonstandardData) # type: ignore return response contributionRoutines.ContributorAccountListRoutine.execute = executeGeneratesResponse @@ -269,10 +276,10 @@ def executeGeneratesResponse(self, request): assert(relationExistsBetweenTaskAndFile) -def test_ReposcannerLabNotebook_canPublishResults(tmpdir): +def test_ReposcannerLabNotebook_canPublishResults(tmp_path: pathlib.Path) -> None: path = pathlib.Path("loggedentitytest.csv") - def generateCSVDataFile(): + def generateCSVDataFile() -> None: informant = provenance.ReposcannerRunInformant() dataEntity = dataEntities.AnnotatedCSVData(str(path)) timestamp = datetime.date.today() @@ -295,20 +302,20 @@ def generateCSVDataFile(): generateCSVDataFile() - def executeGeneratesResponse(self, request): + def executeGeneratesResponse(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> responses.ResponseModel: factory = responses.ResponseFactory() response = factory.createSuccessResponse(attachments=[]) return response - - def exportAddsAnAttachment(self, request, response): - factory = dataEntities.DataEntityFactory() - csvDataEntity = factory.createAnnotatedCSVData(filePath=path.name) - csvDataEntity.readFromFile() - response.addAttachment(csvDataEntity) contributionRoutines.ContributorAccountListRoutine.execute = executeGeneratesResponse - contributionRoutines.ContributorAccountListRoutine.export = exportAddsAnAttachment - outputDir = tmpdir.mkdir("notebookOutput/") + # def exportAddsAnAttachment(self: routines.DataMiningRoutine, request: requests.BaseRequestModel) -> responses.ResponseModel: + # factory = dataEntities.DataEntityFactory() + # csvDataEntity = factory.createAnnotatedCSVData(filePath=path.name) + # csvDataEntity.readFromFile() + # response.addAttachment(csvDataEntity) + # contributionRoutines.ContributorAccountListRoutine.export = exportAddsAnAttachment + + outputDir = tmp_path notebook = provenance.ReposcannerLabNotebook(notebookOutputDirectory=outputDir) request = contributionRoutines.ContributorAccountListRoutineRequest( diff --git a/tests/test_requests.py b/tests/test_requests.py index 50a7293..6672d58 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -5,92 +5,92 @@ import platform -def test_AnalysisRequestModel_isDirectlyConstructible(): +def test_AnalysisRequestModel_isDirectlyConstructible() -> None: analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") -def test_AnalysisRequestModel_hasNoErrorsForValidInput(): +def test_AnalysisRequestModel_hasNoErrorsForValidInput() -> None: analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") assert(not analysisRequest.hasErrors()) -def test_AnalysisRequestModel_passingGarbageOutputDirectoryIsAnError(): - analysisRequest = requests.AnalysisRequestModel(outputDirectory=42) +def test_AnalysisRequestModel_passingGarbageOutputDirectoryIsAnError() -> None: + analysisRequest = requests.AnalysisRequestModel(outputDirectory=42) # type: ignore assert(analysisRequest.hasErrors()) -def test_AnalysisRequestModel_passingNonexistentOutputDirectoryIsAnError(): +def test_AnalysisRequestModel_passingNonexistentOutputDirectoryIsAnError() -> None: analysisRequest = requests.AnalysisRequestModel( outputDirectory="./nonexistentDirectory/") assert(analysisRequest.hasErrors()) -def test_AnalysisRequestModel_passingOutputDirectoryThatCannotBeWrittenToIsAnError(): +def test_AnalysisRequestModel_passingOutputDirectoryThatCannotBeWrittenToIsAnError() -> None: # This test is specific to Mac and Linux environments, so we'll skip it when running # this test in a Windows environment. if platform.system() == 'Windows': - return True + return analysisRequest = requests.AnalysisRequestModel(outputDirectory="/") assert(analysisRequest.hasErrors()) -def test_AnalysisRequestModel_defaultDataCriteriaAcceptsLiterallyEverything(): +def test_AnalysisRequestModel_defaultDataCriteriaAcceptsLiterallyEverything() -> None: analysisRequest = requests.AnalysisRequestModel(outputDirectory="./") assert(analysisRequest.getDataCriteria() == analysisRequest.criteriaFunction) - assert(analysisRequest.criteriaFunction("garbage") is True) - assert(analysisRequest.criteriaFunction(42) is True) - assert(analysisRequest.criteriaFunction(analysisRequest) is True) + assert(analysisRequest.criteriaFunction("garbage") is True) # type: ignore + assert(analysisRequest.criteriaFunction(42) is True) # type: ignore + assert(analysisRequest.criteriaFunction(analysisRequest) is True) # type: ignore -def test_ExternalCommandLineToolRoutineRequest_isDirectlyConstructible(): +def test_ExternalCommandLineToolRoutineRequest_isDirectlyConstructible() -> None: requests.ExternalCommandLineToolRoutineRequest(outputDirectory="./") -def test_ExternalCommandLineToolRoutineRequest_hasNoErrorsForValidInput(): +def test_ExternalCommandLineToolRoutineRequest_hasNoErrorsForValidInput() -> None: commandLineToolRequest = requests.ExternalCommandLineToolRoutineRequest( outputDirectory="./") assert(not commandLineToolRequest.hasErrors()) -def test_ExternalCommandLineToolRoutineRequest_passingOutputDirectoryThatCannotBeWrittenToIsAnError(): +def test_ExternalCommandLineToolRoutineRequest_passingOutputDirectoryThatCannotBeWrittenToIsAnError() -> None: # This test is specific to Mac and Linux environments, so we'll skip it when running # this test in a Windows environment. if platform.system() == 'Windows': - return True + return commandLineToolRequest = requests.ExternalCommandLineToolRoutineRequest( outputDirectory="/") assert(commandLineToolRequest.hasErrors()) -def test_ExternalCommandLineToolRoutineRequest_canStoreOutputDirectory(): +def test_ExternalCommandLineToolRoutineRequest_canStoreOutputDirectory() -> None: commandLineToolRequest = requests.ExternalCommandLineToolRoutineRequest( outputDirectory="./") assert(commandLineToolRequest.getOutputDirectory() == "./") -def test_RepositoryRoutineRequestModel_isDirectlyConstructible(): +def test_RepositoryRoutineRequestModel_isDirectlyConstructible() -> None: requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="./") -def test_RepositoryRoutineRequestModel_hasNoErrorsForValidInput(): +def test_RepositoryRoutineRequestModel_hasNoErrorsForValidInput() -> None: request = requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="./") assert(not request.hasErrors()) -def test_RepositoryRoutineRequestModel_passingOutputDirectoryThatCannotBeWrittenToIsAnError(): +def test_RepositoryRoutineRequestModel_passingOutputDirectoryThatCannotBeWrittenToIsAnError() -> None: # This test is specific to Mac and Linux environments, so we'll skip it when running # this test in a Windows environment. if platform.system() == 'Windows': - return True + return request = requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="/") assert(request.hasErrors()) -def test_RepositoryRoutineRequestModel_canGenerateAndStoreRepositoryLocation(): +def test_RepositoryRoutineRequestModel_canGenerateAndStoreRepositoryLocation() -> None: request = requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="./") location = request.getRepositoryLocation() @@ -100,7 +100,7 @@ def test_RepositoryRoutineRequestModel_canGenerateAndStoreRepositoryLocation(): assert(location.getRepositoryName() == "repo") -def test_RepositoryRoutineRequestModel_badURLMeansError(): +def test_RepositoryRoutineRequestModel_badURLMeansError() -> None: requestA = requests.RepositoryRoutineRequestModel( repositoryURL="garbage", outputDirectory="./") assert(requestA.hasErrors()) @@ -110,19 +110,19 @@ def test_RepositoryRoutineRequestModel_badURLMeansError(): assert(requestB.hasErrors()) requestC = requests.RepositoryRoutineRequestModel( - repositoryURL=None, outputDirectory="./") + repositoryURL=None, outputDirectory="./") # type: ignore assert(requestC.hasErrors()) -def test_RepositoryRoutineRequestModel_canStoreOutputDirectory(): +def test_RepositoryRoutineRequestModel_canStoreOutputDirectory() -> None: request = requests.RepositoryRoutineRequestModel( repositoryURL="https://github.com/owner/repo", outputDirectory="./") assert(request.getOutputDirectory() == "./") -def test_RepositoryRoutineRequestModel_badOutputDirectoryMeansError(): +def test_RepositoryRoutineRequestModel_badOutputDirectoryMeansError() -> None: requestA = requests.RepositoryRoutineRequestModel( - repositoryURL="https://github.com/owner/repo", outputDirectory=None) + repositoryURL="https://github.com/owner/repo", outputDirectory=None) # type: ignore assert(requestA.hasErrors()) requestB = requests.RepositoryRoutineRequestModel( @@ -131,13 +131,13 @@ def test_RepositoryRoutineRequestModel_badOutputDirectoryMeansError(): assert(requestB.hasErrors()) -def test_OnlineRoutineRequest_isDirectlyConstructible(): +def test_OnlineRoutineRequest_isDirectlyConstructible() -> None: requests.OnlineRoutineRequest(repositoryURL="https://github.com/owner/repo", outputDirectory="./", token="ab5571mc1") -def test_OnlineRoutineRequest_canPassKeychainToConstructor(): +def test_OnlineRoutineRequest_canPassKeychainToConstructor() -> None: credentialsDictionary = {} entry = {"url": "https://github.com/", "token": "ab341m32"} credentialsDictionary["platform"] = entry @@ -147,13 +147,14 @@ def test_OnlineRoutineRequest_canPassKeychainToConstructor(): keychain=keychain) -def test_OnlineRoutineRequest_canStoreValidCredentials(): +def test_OnlineRoutineRequest_canStoreValidCredentials() -> None: requestA = requests.OnlineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", token="ab5571mc1") assert(not requestA.hasErrors()) credentialsA = requestA.getCredentials() + assert(credentialsA is not None) assert(credentialsA.hasTokenAvailable()) assert(credentialsA.getToken() == "ab5571mc1") @@ -164,12 +165,13 @@ def test_OnlineRoutineRequest_canStoreValidCredentials(): password="luggage") assert(not requestB.hasErrors()) credentialsB = requestB.getCredentials() + assert(credentialsB is not None) assert(credentialsB.hasUsernameAndPasswordAvailable()) assert(credentialsB.getUsername() == "argyle") assert(credentialsB.getPassword() == "luggage") -def test_OnlineRoutineRequest_canStoreValidCredentialsViaKeychain(): +def test_OnlineRoutineRequest_canStoreValidCredentialsViaKeychain() -> None: credentialsDictionary = {} entry = {"url": "https://github.com/", "token": "ab341m32"} credentialsDictionary["platform"] = entry @@ -181,11 +183,12 @@ def test_OnlineRoutineRequest_canStoreValidCredentialsViaKeychain(): keychain=keychain) assert(not request.hasErrors()) credentials = request.getCredentials() + assert(credentials is not None) assert(credentials.hasTokenAvailable()) assert(credentials.getToken() == "ab341m32") -def test_OnlineRoutineRequest_keychainTakesPrecedenceOverOtherInputs(): +def test_OnlineRoutineRequest_keychainTakesPrecedenceOverOtherInputs() -> None: credentialsDictionary = {} entry = {"url": "https://github.com/", "token": "ab341m32"} credentialsDictionary["platform"] = entry @@ -198,11 +201,12 @@ def test_OnlineRoutineRequest_keychainTakesPrecedenceOverOtherInputs(): keychain=keychain) assert(not request.hasErrors()) credentials = request.getCredentials() + assert(credentials is not None) assert(credentials.hasTokenAvailable()) assert(credentials.getToken() == "ab341m32") -def test_OnlineRoutineRequest_doesNotSwitchToOtherCredentialsIfKeychainLacksThem(): +def test_OnlineRoutineRequest_doesNotSwitchToOtherCredentialsIfKeychainLacksThem() -> None: credentialsDictionary = {} entry = {"url": "https://gitlab.com/", "token": "ab341m32"} credentialsDictionary["platform"] = entry @@ -216,7 +220,7 @@ def test_OnlineRoutineRequest_doesNotSwitchToOtherCredentialsIfKeychainLacksThem assert(request.hasErrors()) -def test_OnlineRoutineRequest_badCredentialsMeansError(): +def test_OnlineRoutineRequest_badCredentialsMeansError() -> None: requestA = requests.OnlineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", @@ -238,13 +242,13 @@ def test_OnlineRoutineRequest_badCredentialsMeansError(): assert(requestC.hasErrors()) -def test_OfflineRoutineRequest_isDirectlyConstructible(): +def test_OfflineRoutineRequest_isDirectlyConstructible() -> None: requests.OfflineRoutineRequest(repositoryURL="https://github.com/owner/repo", outputDirectory="./", workspaceDirectory="./") -def test_OfflineRoutineRequest_canStoreValidWorkspaceDirectory(): +def test_OfflineRoutineRequest_canStoreValidWorkspaceDirectory() -> None: request = requests.OfflineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", @@ -253,7 +257,7 @@ def test_OfflineRoutineRequest_canStoreValidWorkspaceDirectory(): assert(request.getWorkspaceDirectory() == "./") -def test_OfflineRoutineRequest_cloneDirectoryIsBasedOnWorkspaceDirectory(): +def test_OfflineRoutineRequest_cloneDirectoryIsBasedOnWorkspaceDirectory() -> None: request = requests.OfflineRoutineRequest( repositoryURL="https://github.com/scikit/scikit-data", outputDirectory="./outputs/", @@ -261,11 +265,11 @@ def test_OfflineRoutineRequest_cloneDirectoryIsBasedOnWorkspaceDirectory(): assert(request.getCloneDirectory() == Path("./workspace/scikit_scikit-data")) -def test_OfflineRoutineRequest_badWorkspaceDirectoryMeansError(): +def test_OfflineRoutineRequest_badWorkspaceDirectoryMeansError() -> None: requestA = requests.OfflineRoutineRequest( repositoryURL="https://github.com/owner/repo", outputDirectory="./", - workspaceDirectory=None) + workspaceDirectory=None) # type: ignore assert(requestA.hasErrors()) requestB = requests.OfflineRoutineRequest( diff --git a/tests/test_response.py b/tests/test_response.py index 000fd1d..7be5115 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -2,7 +2,7 @@ import reposcanner.response as responseapi -def test_ResponseModel_CanConstructSuccessfulResponseByFactory(): +def test_ResponseModel_CanConstructSuccessfulResponseByFactory() -> None: message = None attachments = None responseFactory = responseapi.ResponseFactory() @@ -11,7 +11,7 @@ def test_ResponseModel_CanConstructSuccessfulResponseByFactory(): assert (successfulResponse.wasSuccessful() is True) -def test_ResponseModel_CanConstructFailureResponseByFactory(): +def test_ResponseModel_CanConstructFailureResponseByFactory() -> None: message = None attachments = None responseFactory = responseapi.ResponseFactory() @@ -20,22 +20,22 @@ def test_ResponseModel_CanConstructFailureResponseByFactory(): assert (successfulResponse.wasSuccessful() is False) -def test_ResponseModel_HasNoMessageByDefault(): - status = None +def test_ResponseModel_HasNoMessageByDefault() -> None: + status = responseapi.ResponseStatus.SUCCESS response = responseapi.ResponseModel(status) assert (response.hasMessage() is False) assert (response.getMessage() is None) -def test_ResponseModel_HasNoAttachmentsByDefault(): - status = None +def test_ResponseModel_HasNoAttachmentsByDefault() -> None: + status = responseapi.ResponseStatus.SUCCESS response = responseapi.ResponseModel(status) assert (response.hasAttachments() is False) assert (len(response.getAttachments()) == 0) -def test_ResponseModel_CanStoreAttachments(): - status = None +def test_ResponseModel_CanStoreAttachments() -> None: + status = responseapi.ResponseStatus.SUCCESS attachments = ["dataA", "dataB"] response = responseapi.ResponseModel(status, message=None, @@ -44,8 +44,8 @@ def test_ResponseModel_CanStoreAttachments(): assert (len(response.getAttachments()) == 2) -def test_ResponseModel_CanStoreMessage(): - status = None +def test_ResponseModel_CanStoreMessage() -> None: + status = responseapi.ResponseStatus.SUCCESS message = "details listed here" response = responseapi.ResponseModel(status, message=message, diff --git a/tests/test_thirdPartyLibraries.py b/tests/test_thirdPartyLibraries.py index 2ce2f31..31fe7d6 100644 --- a/tests/test_thirdPartyLibraries.py +++ b/tests/test_thirdPartyLibraries.py @@ -3,7 +3,7 @@ import os -def test_GraphViz_isAvailableOnCommandLine(): +def test_GraphViz_isAvailableOnCommandLine() -> None: """ This tests whether we can get a version for Dot. The version info should look something like this: From fd80fa37611d6e0f92aababd7ed0bfb1f3e57aec Mon Sep 17 00:00:00 2001 From: Samuel Grayson Date: Thu, 15 Jun 2023 16:43:50 -0500 Subject: [PATCH 12/12] Document how to use mypy --- README.md | 55 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 0e8450f..0ff64d3 100644 --- a/README.md +++ b/README.md @@ -13,25 +13,10 @@ The diagram above illustrates the overall architecture of the Reposcanner toolki ## How to Install -First, clone the repository from GitHub: - ``` -git clone https://github.com/bssw-psip/reposcanner.git +pip install git+https://github.com/bssw-psip/reposcanner.git ``` -Then install Reposcanner and run the test suite: - -``` -cd reposcanner -python3 -m venv ../repo-env # create a new virtual environment -. ../repo-env/bin/activate # activate the new virtual env -pip install -e . # create editable install -tox # run tests -``` - -If all tests pass, the installation was successful, and you are ready to go! - - # How to Run ## Setup input files @@ -87,3 +72,41 @@ analysis: arg0: "foo" arg1: [1, 2, 3] ``` + +# Contributing + +## How to install in development mode + +``` +git clone https://github.com/bssw-psip/reposcanner.git +cd reposcanner +python3 -m venv ../repo-env # create a new virtual environment +. ../repo-env/bin/activate # activate the new virtual env +pip install -e . # create editable install +``` + +Note you will need to run `. ../repo-env/bin/activate` every time you start a +new shell to get this environment back. + +You can use a type-checker like [mypy] to catch errors before runtime. Mypy can +catch variable name errors, type errors, function arity mismatches, and many +others. + +[mypy]: https://www.mypy-lang.org/ + +To run mypy, + +``` +export MYPYPATH=${PWD}/src:$MYPYPATH +mypy --strict tests src +``` + +One can also use tests to build confidence in the correctness of the code. + +``` +export PYTHONPATH=${PWD}/src:$PYTHONPATH +pytest +``` + +You can pass `--exitfirst` to exit after the first failing test and +`--failed-first` to run the tests which failed last time first.