From d26d6b12a90d119392d8a692572fbf2589f0e023 Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 11 Feb 2021 13:17:16 -0500 Subject: [PATCH 01/57] Add fork:false to Github queries --- follow_repos_by_search_term.py | 2 +- follow_top_repos_by_star_count.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index f1f7688..797dd5b 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -22,7 +22,7 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str): site = LGTMSite.create_from_file() for date_range in utils.github_dates.generate_dates(): - repos = github.search_repositories(query=f'language:{language} created:{date_range} {search_term}') + repos = github.search_repositories(query=f'language:{language} fork:false created:{date_range} {search_term}') for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 7c7cca3..2d44221 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -22,7 +22,7 @@ def find_and_save_projects_to_lgtm(language: str): site = LGTMSite.create_from_file() for date_range in utils.github_dates.generate_dates(): - repos = github.search_repositories(query=f'stars:>500 created:{date_range} sort:stars language:{language}') + repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}') for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: From 2e58640287f9a93cf8e7a0a4fe51f5168840bab0 Mon Sep 17 00:00:00 2001 From: Thank You Date: Tue, 16 Feb 2021 20:37:43 -0500 Subject: [PATCH 02/57] Initial work setting up custom LGTM project list curation --- follow_top_repos_by_star_count.py | 25 ++++++++++ lgtm.py | 5 ++ move_repos_to_lgtm_lists.py | 78 +++++++++++++++++++++++++++++++ test.py | 10 ++++ 4 files changed, 118 insertions(+) create mode 100644 move_repos_to_lgtm_lists.py create mode 100644 test.py diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 2d44221..431fc01 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -1,3 +1,28 @@ +# ## How the script currently works: +# - We first get all the github repos. +# - We then take each repo and follow the repository in lgtm +# +# +# ## changes that need to be made +# - Since we are adding lists to lgtm, we also need to store someplace every +# repo that we added to lgtm. +# - once teh script is done the list of lgtm saved projects will be stored in a txt file +# - after a period of time, say 24 hrs, we then run a companion script that moves +# lgtm followed projects into their own lists. this script will take the text file name +# and use that to create a list. it will then move the lgtm projects into that list and +# unfollow them from the lgtm list. this script can be used universally. +# +# - explicit changes: +# - current scripts: +# - each script must now accept a list arg that represents the list name that you want +# your repos to be saved to. +# - each script must now add the lgtm project id to a file that stores repos (txt file) +# - new script: +# - we need a script that will take a text file, loop through the text file, +# and for each item in the text file add the item to the lgtm list (the list name +# is derived from the name of the ext file) +# + from typing import List from lgtm import LGTMSite diff --git a/lgtm.py b/lgtm.py index 8750c40..4f86e16 100644 --- a/lgtm.py +++ b/lgtm.py @@ -35,6 +35,7 @@ def _make_lgtm_get(self, url: str) -> dict: ) return r.json() + # Retrieves a user's projects def get_my_projects(self) -> List[dict]: url = 'https://lgtm.com/internal_api/v0.2/getMyProjects?apiVersion=' + self.api_version data = self._make_lgtm_get(url) @@ -43,10 +44,12 @@ def get_my_projects(self) -> List[dict]: else: raise LGTMRequestException('LGTM GET request failed with response: %s' % str(data)) + # Given an org name, retrieve a user's projects under that org def get_my_projects_under_org(self, org: str) -> List['SimpleProject']: projects_sorted = LGTMDataFilters.org_to_ids(self.get_my_projects()) return LGTMDataFilters.extract_project_under_org(org, projects_sorted) + # This method handles making a POST request to the LGTM server def _make_lgtm_post(self, url: str, data: dict) -> dict: api_data = { 'apiVersion': self.api_version @@ -74,6 +77,7 @@ def _make_lgtm_post(self, url: str, data: dict) -> dict: else: raise LGTMRequestException('LGTM POST request failed with response: %s' % str(data_returned)) + # Given a project list id and a list of project ids, add the projects to the project list. def load_into_project_list(self, into_project: int, lgtm_project_ids: List[str]): url = "https://lgtm.com/internal_api/v0.2/updateProjectSelection" # Because LGTM uses some wacky format for it's application/x-www-form-urlencoded data @@ -112,6 +116,7 @@ def follow_repository(self, repository_url: str): } self._make_lgtm_post(url, data) + # Given a project id, unfollow the project def unfollow_repository_by_id(self, project_id: str): url = "https://lgtm.com/internal_api/v0.2/unfollowProject" data = { diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py new file mode 100644 index 0000000..6955418 --- /dev/null +++ b/move_repos_to_lgtm_lists.py @@ -0,0 +1,78 @@ +# - new script: +# - we need a script that will take a text file, loop through the text file, +# and for each item in the text file add the item to the lgtm list (the list name +# is derived from the name of the ext file) +# + +from typing import List +from lgtm import LGTMSite + +import sys +import time + +lgtm_site = LGTMSite.create_from_file() + +file_name = "test.txt" +project_list_name = file_name.split(".")[0] + +# We want to find or create a project list based on the the name of +# the text file that holds all of the projects we are currently following. +project_list_id = lgtm_site.get_or_create_project_list(project_list_name) +file = open(file_name, "r") + + +project_ids = file.read() +# With the project list id and the project ids, we now want to save the repos +# we currently follow to the project list +site.load_into_project_list(project_list_id, project_ids) + +for project_id in project_ids: + print(project_id) + # The last thing we need to do is tidy up and unfollow all the repositories + # we just added to our project list. + site.unfollow_repository_by_id(project_id) + + +# lgtm methods we need to use +# get_or_create_project_list +# unfollow_repository_by_id +# load_into_project_list + + +# +# def save_project_to_lgtm(site: 'LGTMSite', repo_name: str): +# print("Adding: " + repo_name) +# # Another throttle. Considering we are sending a request to Github +# # owned properties twice in a small time-frame, I would prefer for +# # this to be here. +# time.sleep(1) +# +# repo_url: str = 'https://github.com/' + repo_name +# site.follow_repository(repo_url) +# print("Saved the project: " + repo_name) +# +# def find_and_save_projects_to_lgtm(language: str): +# github = utils.github_api.create() +# site = LGTMSite.create_from_file() +# +# for date_range in utils.github_dates.generate_dates(): +# repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}') +# +# for repo in repos: +# # Github has rate limiting in place hence why we add a sleep here. More info can be found here: +# # https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting +# time.sleep(1) +# +# if repo.archived or repo.fork: +# continue +# +# save_project_to_lgtm(site, repo.full_name) +# +# if len(sys.argv) < 2: +# print("Please provide a language you want to search") +# exit +# +# language = sys.argv[1].capitalize() +# +# print('Following the top repos for %s' % language) +# find_and_save_projects_to_lgtm(language) diff --git a/test.py b/test.py new file mode 100644 index 0000000..14abdf4 --- /dev/null +++ b/test.py @@ -0,0 +1,10 @@ +from lgtm import LGTMSite +lgtm_site = LGTMSite.create_from_file() + +repo_url: str = 'https://github.com/google/jax' + +result = lgtm_site.follow_repository(repo_url) +print("---------------") +print("---------------") +print("---------------") +print(result) From d4098d38166742511209fe908ec0391398c5efa1 Mon Sep 17 00:00:00 2001 From: Thank You Date: Tue, 16 Feb 2021 21:58:58 -0500 Subject: [PATCH 03/57] Clean up code and get basic cache parsing file setup --- lgtm.py | 4 ++-- move_repos_to_lgtm_lists.py | 6 +++--- test.py | 22 ++++++++++++++++++---- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/lgtm.py b/lgtm.py index 4f86e16..905a56d 100644 --- a/lgtm.py +++ b/lgtm.py @@ -108,13 +108,13 @@ def force_rebuild_project(self, simple_project: 'SimpleProject'): except LGTMRequestException: print('Failed rebuilding project. This may be because it is already being built. `%s`' % simple_project) - def follow_repository(self, repository_url: str): + def follow_repository(self, repository_url: str) -> dict: url = "https://lgtm.com/internal_api/v0.2/followProject" data = { 'url': repository_url, 'apiVersion': self.api_version } - self._make_lgtm_post(url, data) + return self._make_lgtm_post(url, data) # Given a project id, unfollow the project def unfollow_repository_by_id(self, project_id: str): diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index 6955418..4552bf0 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -10,17 +10,17 @@ import sys import time -lgtm_site = LGTMSite.create_from_file() +site = LGTMSite.create_from_file() file_name = "test.txt" project_list_name = file_name.split(".")[0] # We want to find or create a project list based on the the name of # the text file that holds all of the projects we are currently following. -project_list_id = lgtm_site.get_or_create_project_list(project_list_name) +project_list_data = site.get_or_create_project_list(project_list_name) +project_list_id = project_list_data['realProject'][0]['key'] file = open(file_name, "r") - project_ids = file.read() # With the project list id and the project ids, we now want to save the repos # we currently follow to the project list diff --git a/test.py b/test.py index 14abdf4..3d953a3 100644 --- a/test.py +++ b/test.py @@ -4,7 +4,21 @@ repo_url: str = 'https://github.com/google/jax' result = lgtm_site.follow_repository(repo_url) -print("---------------") -print("---------------") -print("---------------") -print(result) +print("1111111111") +print("1111111111") +print("1111111111") +print("1111111111") +print("1111111111") +project_id = result['realProject'][0]['key'] +print(project_id) + +print("1111111111") +print("1111111111") +print("1111111111") +print("1111111111") +project_list_id = lgtm_site.get_or_create_project_list("test_project_16") +print(project_list_id) + +lgtm_site.load_into_project_list(project_list_id, [project_id]) + +lgtm_site.unfollow_repository_by_id(project_id) From e69003a9f091831ed9e837300934aa62ba1402f8 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 15:10:53 -0500 Subject: [PATCH 04/57] Continued work on custom project lists feature --- README.md | 31 +++++++++++- cache/test.txt | 0 follow_top_repos_by_star_count.py | 23 +++++++-- move_repos_to_lgtm_lists.py | 80 ++++++++----------------------- test.py | 52 ++++++++++++-------- utils/cacher.py | 7 +++ 6 files changed, 108 insertions(+), 85 deletions(-) create mode 100644 cache/test.txt create mode 100644 utils/cacher.py diff --git a/README.md b/README.md index 0e5a874..3f8f515 100644 --- a/README.md +++ b/README.md @@ -66,9 +66,38 @@ python3 follow_repos_by_search_term_via_code_instances.py # Finds top repositories that have a minimum 500 stars and use the provided programming language. -python3 follow_top_repos_by_star_count.py +python3 follow_top_repos_by_star_count.py (optional) ``` +## The Custom Projects Lists Feature +In developing these collection of scripts, we realized that when a user follows thousands of repos in their LGTM account, there is a chance that the LGTM account will break. You won't be able to use the query console and some API +calls will be broken. + +To resolve this, we decided to create a feature users can opt-in. This feature called "Custom Projects Lists" does the +following: + +- Follows all repos (aka project) in your LGTM account. +- Stores every project you follow in a txt file. +- At a later date (we suggest 24 hours), the user may run a follow-up command that will take the repos followed, add them to a LGTM custom list, and finally unfollow the projects in the user's LGTM account. + +Although these steps are tedious, this is the best work-around we've found. We avoid bricking the LGTM account when projects are placed in custom lists. Also, we typically wait 24 hours since if the project is new to LGTM it will want to first process the project and projects being processed can't be added to custom lists. + +Finally, by having custom lists we hope that the security researcher will have an easier time picking which repos they want to test. + +### How To Run The Custom Projects Lists Feature +In some of the commands above, you will see the option. This is optional for all +commands. This CUSTOM_LIST_NAME represents the name of a LGTM project list that will be created and used to add projects to. Any projects found from that command will then be added to the LGTM custom list. Let's show an example below to get a better idea of how this works: + +1. Run a command passing in the name of the custom list name. The command below will follow Javascript repos and generate a cache file of every repo you follow for the project list called "cool_javascript_projects". + + `python3 follow_top_repos_by_star_count.py javascript cool_javascript_projects` + +2. Wait 1 - 24 hours. + +3. Run the command below. This will take a cached file you created earlier, create a LGTM custom project list, add the projects to that project list, and finally unfollow the repositories in your LGTM account. + + `python3 move_repos_to_lgtm_lists.py` + ## Legal The author of this script assumes no liability for your use of this project, including, diff --git a/cache/test.txt b/cache/test.txt new file mode 100644 index 0000000..e69de29 diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 431fc01..44a774c 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -28,10 +28,11 @@ import utils.github_dates import utils.github_api +import utils.cacher # utils.cacher.write_project_ids_to_file import sys import time -def save_project_to_lgtm(site: 'LGTMSite', repo_name: str): +def save_project_to_lgtm(site: 'LGTMSite', repo_name: str) -> dict: print("Adding: " + repo_name) # Another throttle. Considering we are sending a request to Github # owned properties twice in a small time-frame, I would prefer for @@ -39,12 +40,15 @@ def save_project_to_lgtm(site: 'LGTMSite', repo_name: str): time.sleep(1) repo_url: str = 'https://github.com/' + repo_name - site.follow_repository(repo_url) + project = site.follow_repository(repo_url) + print("Saved the project: " + repo_name) + return project -def find_and_save_projects_to_lgtm(language: str): +def find_and_save_projects_to_lgtm(language: str, custom_list_name: str) -> List[str]: github = utils.github_api.create() site = LGTMSite.create_from_file() + saved_project_ids: List[str] = [] for date_range in utils.github_dates.generate_dates(): repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}') @@ -57,7 +61,11 @@ def find_and_save_projects_to_lgtm(language: str): if repo.archived or repo.fork: continue - save_project_to_lgtm(site, repo.full_name) + saved_project = save_project_to_lgtm(site, repo.full_name) + saved_project_id = saved_project['realProject'][0]['key'] + saved_project_ids.append(saved_project) + + return saved_project_ids if len(sys.argv) < 2: print("Please provide a language you want to search") @@ -66,4 +74,9 @@ def find_and_save_projects_to_lgtm(language: str): language = sys.argv[1].capitalize() print('Following the top repos for %s' % language) -find_and_save_projects_to_lgtm(language) +saved_project_ids = find_and_save_projects_to_lgtm(language) + +# If the user provided a second arg then they want to create a custom list. +if len(sys.argv) < 3: + custom_list_name = sys.argv[2] + utils.cacher.write_project_ids_to_file(saved_project_ids, custom_list_name) diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index 4552bf0..ee988fa 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -9,70 +9,32 @@ import sys import time +import os +cached_files = os.listdir("cache") site = LGTMSite.create_from_file() -file_name = "test.txt" -project_list_name = file_name.split(".")[0] +for cached_file in cached_files: + # This is dirty. Is there an easier way to do this? + cached_file = "cache/" + cached_file -# We want to find or create a project list based on the the name of -# the text file that holds all of the projects we are currently following. -project_list_data = site.get_or_create_project_list(project_list_name) -project_list_id = project_list_data['realProject'][0]['key'] -file = open(file_name, "r") + project_list_name = cached_file.split(".")[0] -project_ids = file.read() -# With the project list id and the project ids, we now want to save the repos -# we currently follow to the project list -site.load_into_project_list(project_list_id, project_ids) + # We want to find or create a project list based on the the name of + # the text file that holds all of the projects we are currently following. + project_list_data = site.get_or_create_project_list(project_list_name) + project_list_id = project_list_data['realProject'][0]['key'] + file = open("cache/" + cached_file, "r") -for project_id in project_ids: - print(project_id) - # The last thing we need to do is tidy up and unfollow all the repositories - # we just added to our project list. - site.unfollow_repository_by_id(project_id) + project_ids = file.read() + # With the project list id and the project ids, we now want to save the repos + # we currently follow to the project list + site.load_into_project_list(project_list_id, project_ids) + for project_id in project_ids: + print(project_id) + # The last thing we need to do is tidy up and unfollow all the repositories + # we just added to our project list. + site.unfollow_repository_by_id(project_id) -# lgtm methods we need to use -# get_or_create_project_list -# unfollow_repository_by_id -# load_into_project_list - - -# -# def save_project_to_lgtm(site: 'LGTMSite', repo_name: str): -# print("Adding: " + repo_name) -# # Another throttle. Considering we are sending a request to Github -# # owned properties twice in a small time-frame, I would prefer for -# # this to be here. -# time.sleep(1) -# -# repo_url: str = 'https://github.com/' + repo_name -# site.follow_repository(repo_url) -# print("Saved the project: " + repo_name) -# -# def find_and_save_projects_to_lgtm(language: str): -# github = utils.github_api.create() -# site = LGTMSite.create_from_file() -# -# for date_range in utils.github_dates.generate_dates(): -# repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}') -# -# for repo in repos: -# # Github has rate limiting in place hence why we add a sleep here. More info can be found here: -# # https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting -# time.sleep(1) -# -# if repo.archived or repo.fork: -# continue -# -# save_project_to_lgtm(site, repo.full_name) -# -# if len(sys.argv) < 2: -# print("Please provide a language you want to search") -# exit -# -# language = sys.argv[1].capitalize() -# -# print('Following the top repos for %s' % language) -# find_and_save_projects_to_lgtm(language) + os.remove(cached_file) diff --git a/test.py b/test.py index 3d953a3..02bb4e0 100644 --- a/test.py +++ b/test.py @@ -1,24 +1,36 @@ -from lgtm import LGTMSite -lgtm_site = LGTMSite.create_from_file() +from typing import List +import sys +import os -repo_url: str = 'https://github.com/google/jax' +print(os.listdir("cache")) -result = lgtm_site.follow_repository(repo_url) -print("1111111111") -print("1111111111") -print("1111111111") -print("1111111111") -print("1111111111") -project_id = result['realProject'][0]['key'] -print(project_id) -print("1111111111") -print("1111111111") -print("1111111111") -print("1111111111") -project_list_id = lgtm_site.get_or_create_project_list("test_project_16") -print(project_list_id) +# +# projects: List[str] = [] +# +# print(sys.argv) -lgtm_site.load_into_project_list(project_list_id, [project_id]) - -lgtm_site.unfollow_repository_by_id(project_id) +# from lgtm import LGTMSite +# lgtm_site = LGTMSite.create_from_file() +# +# repo_url: str = 'https://github.com/google/jax' +# +# result = lgtm_site.follow_repository(repo_url) +# print("1111111111") +# print("1111111111") +# print("1111111111") +# print("1111111111") +# print("1111111111") +# project_id = result['realProject'][0]['key'] +# print(project_id) +# +# print("1111111111") +# print("1111111111") +# print("1111111111") +# print("1111111111") +# project_list_id = lgtm_site.get_or_create_project_list("test_project_16") +# print(project_list_id) +# +# lgtm_site.load_into_project_list(project_list_id, [project_id]) +# +# lgtm_site.unfollow_repository_by_id(project_id) diff --git a/utils/cacher.py b/utils/cacher.py new file mode 100644 index 0000000..3c1b6e9 --- /dev/null +++ b/utils/cacher.py @@ -0,0 +1,7 @@ +from typing import List + +def write_project_ids_to_file(project_ids: List[str], file_name: str): + file = open("cache/" + file_name + ".txt", "a") + for project_id in project_ids: + file.write(project_id + "\n") + file.close() From 02f16a3fc74391bcfab91cff90cf5dc237d53942 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:03:51 -0500 Subject: [PATCH 05/57] Fix misc issues --- README.md | 4 +++- cache/test.txt | 0 follow_top_repos_by_star_count.py | 13 ++++++++----- move_repos_to_lgtm_lists.py | 26 ++++++++++++-------------- test.py | 2 +- 5 files changed, 24 insertions(+), 21 deletions(-) delete mode 100644 cache/test.txt diff --git a/README.md b/README.md index 3f8f515..f342aaa 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ commands. This CUSTOM_LIST_NAME represents the name of a LGTM project list that 1. Run a command passing in the name of the custom list name. The command below will follow Javascript repos and generate a cache file of every repo you follow for the project list called "cool_javascript_projects". - `python3 follow_top_repos_by_star_count.py javascript cool_javascript_projects` + `python3 follow_top_repos_by_star_count.py javascript big_ole_js_projects` 2. Wait 1 - 24 hours. @@ -98,6 +98,8 @@ commands. This CUSTOM_LIST_NAME represents the name of a LGTM project list that `python3 move_repos_to_lgtm_lists.py` +Note: When naming a project custom list name, please use alphanumeric, dashes, and underscore characters only. + ## Legal The author of this script assumes no liability for your use of this project, including, diff --git a/cache/test.txt b/cache/test.txt deleted file mode 100644 index e69de29..0000000 diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 44a774c..0b6503b 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -45,13 +45,14 @@ def save_project_to_lgtm(site: 'LGTMSite', repo_name: str) -> dict: print("Saved the project: " + repo_name) return project -def find_and_save_projects_to_lgtm(language: str, custom_list_name: str) -> List[str]: +def find_and_save_projects_to_lgtm(language: str) -> List[str]: github = utils.github_api.create() site = LGTMSite.create_from_file() saved_project_ids: List[str] = [] for date_range in utils.github_dates.generate_dates(): - repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}') + # this was originally 500 stars. changing this so that it's easy for testing + repos = github.search_repositories(query=f'stars:>100500 created:{date_range} fork:false sort:stars language:{language}') for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: @@ -63,7 +64,7 @@ def find_and_save_projects_to_lgtm(language: str, custom_list_name: str) -> List saved_project = save_project_to_lgtm(site, repo.full_name) saved_project_id = saved_project['realProject'][0]['key'] - saved_project_ids.append(saved_project) + saved_project_ids.append(saved_project_id) return saved_project_ids @@ -75,8 +76,10 @@ def find_and_save_projects_to_lgtm(language: str, custom_list_name: str) -> List print('Following the top repos for %s' % language) saved_project_ids = find_and_save_projects_to_lgtm(language) - +print("saved proejct ids") +print(saved_project_ids) # If the user provided a second arg then they want to create a custom list. -if len(sys.argv) < 3: +if len(sys.argv) <= 3: + # print custom_list_name = sys.argv[2] utils.cacher.write_project_ids_to_file(saved_project_ids, custom_list_name) diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index ee988fa..c5e33e9 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -1,9 +1,3 @@ -# - new script: -# - we need a script that will take a text file, loop through the text file, -# and for each item in the text file add the item to the lgtm list (the list name -# is derived from the name of the ext file) -# - from typing import List from lgtm import LGTMSite @@ -14,25 +8,29 @@ cached_files = os.listdir("cache") site = LGTMSite.create_from_file() -for cached_file in cached_files: +for cached_file_name in cached_files: # This is dirty. Is there an easier way to do this? - cached_file = "cache/" + cached_file + cached_file = "cache/" + cached_file_name - project_list_name = cached_file.split(".")[0] + project_list_name = cached_file_name.split(".")[0] # We want to find or create a project list based on the the name of # the text file that holds all of the projects we are currently following. - project_list_data = site.get_or_create_project_list(project_list_name) - project_list_id = project_list_data['realProject'][0]['key'] - file = open("cache/" + cached_file, "r") + project_list_id = site.get_or_create_project_list(project_list_name) + + file = open(cached_file, "r") - project_ids = file.read() + project_ids = file.read().split("\n") + + # remove any "" in the array + while("" in project_ids): + project_ids.remove("") # With the project list id and the project ids, we now want to save the repos # we currently follow to the project list + site.load_into_project_list(project_list_id, project_ids) for project_id in project_ids: - print(project_id) # The last thing we need to do is tidy up and unfollow all the repositories # we just added to our project list. site.unfollow_repository_by_id(project_id) diff --git a/test.py b/test.py index 02bb4e0..90e39df 100644 --- a/test.py +++ b/test.py @@ -2,7 +2,7 @@ import sys import os -print(os.listdir("cache")) +print(len(sys.argv)) # From d3898cbb7454d6cdbaa0278ff3911bddf16f801e Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:06:13 -0500 Subject: [PATCH 06/57] Add comment and ignore cache files --- .gitignore | 2 ++ move_repos_to_lgtm_lists.py | 1 + 2 files changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 95cd877..2bfacce 100644 --- a/.gitignore +++ b/.gitignore @@ -376,4 +376,6 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk +cache/* + # End of https://www.toptal.com/developers/gitignore/api/java,pycharm+all,intellij+all,python,macos,windows,linux diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index c5e33e9..3f0f392 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -35,4 +35,5 @@ # we just added to our project list. site.unfollow_repository_by_id(project_id) + # Since we are done with the file, we can now delete it from the cache. os.remove(cached_file) From 8e839a8fa9d515a23c1f7807988a8dcd1264144a Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:21:00 -0500 Subject: [PATCH 07/57] Refactor code --- move_repos_to_lgtm_lists.py | 39 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index 3f0f392..46d4bbf 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -5,19 +5,14 @@ import time import os -cached_files = os.listdir("cache") -site = LGTMSite.create_from_file() - -for cached_file_name in cached_files: - # This is dirty. Is there an easier way to do this? - cached_file = "cache/" + cached_file_name - +def get_project_list_id(cached_file_name: str) -> str: project_list_name = cached_file_name.split(".")[0] # We want to find or create a project list based on the the name of # the text file that holds all of the projects we are currently following. - project_list_id = site.get_or_create_project_list(project_list_name) + return site.get_or_create_project_list(project_list_name) +def get_project_ids(cached_file: str) -> List[str]: file = open(cached_file, "r") project_ids = file.read().split("\n") @@ -25,15 +20,33 @@ # remove any "" in the array while("" in project_ids): project_ids.remove("") - # With the project list id and the project ids, we now want to save the repos - # we currently follow to the project list - site.load_into_project_list(project_list_id, project_ids) + return project_ids +def cleanup(file_name: str): + # Since we are done with the file, we can now delete it from the cache. + os.remove(file_name) + +def unfollow_projects(project_ids: List[str]): for project_id in project_ids: # The last thing we need to do is tidy up and unfollow all the repositories # we just added to our project list. site.unfollow_repository_by_id(project_id) - # Since we are done with the file, we can now delete it from the cache. - os.remove(cached_file) +def process_cached_file(cached_file_name): + cached_file = "cache/" + cached_file_name + project_list_id = get_project_list_id(cached_file_name) + project_ids = get_project_ids(cached_file) + + # With the project list id and the project ids, we now want to save the repos + # we currently follow to the project list + site.load_into_project_list(project_list_id, project_ids) + + unfollow_projects(project_ids) + cleanup(cached_file) + +site = LGTMSite.create_from_file() +cached_file_names = os.listdir("cache") + +for cached_file_name in cached_file_names: + process_cached_file(cached_file_name) From 5c996fc2f8310ef922e5a8a8ed6540a94daa2055 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:21:11 -0500 Subject: [PATCH 08/57] Reword text --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index f342aaa..9fc71ca 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,7 @@ python3 follow_top_repos_by_star_count.py (optional In developing these collection of scripts, we realized that when a user follows thousands of repos in their LGTM account, there is a chance that the LGTM account will break. You won't be able to use the query console and some API calls will be broken. -To resolve this, we decided to create a feature users can opt-in. This feature called "Custom Projects Lists" does the -following: +To resolve this, we decided to create a feature users can opt-in. The "Custom Projects Lists" feature does the following: - Follows all repos (aka project) in your LGTM account. - Stores every project you follow in a txt file. From 7bd1bf05f3afd4e82de37735b5b0b69e9856d1dd Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:41:23 -0500 Subject: [PATCH 09/57] Revert stars to accurate count --- follow_top_repos_by_star_count.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 0b6503b..60b4d75 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -51,8 +51,7 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: saved_project_ids: List[str] = [] for date_range in utils.github_dates.generate_dates(): - # this was originally 500 stars. changing this so that it's easy for testing - repos = github.search_repositories(query=f'stars:>100500 created:{date_range} fork:false sort:stars language:{language}') + repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}') for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: From 3f8d3367d5a5232abf3eb8b9c57b989651e0544a Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:41:48 -0500 Subject: [PATCH 10/57] Remove comment --- follow_top_repos_by_star_count.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 60b4d75..ccf1be5 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -28,7 +28,7 @@ import utils.github_dates import utils.github_api -import utils.cacher # utils.cacher.write_project_ids_to_file +import utils.cacher import sys import time From ca2bbc6151e1847994089600f2ce0a777abf5e76 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:54:01 -0500 Subject: [PATCH 11/57] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9fc71ca..b41bd09 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ python3 move_org_projects_under_project_list_then_unfollow.py # Finds repositories given a search term. Under the hood, the script searches Github for repositories that match the provided search term. -python3 follow_repos_by_search_term.py +python3 follow_repos_by_search_term.py (optional) # Finds top repositories that have a minimum 500 stars and use the provided programming language. python3 follow_top_repos_by_star_count.py (optional) From 69543fb298a1d812fd9b5b93111988bcb8a076c6 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 16:54:23 -0500 Subject: [PATCH 12/57] Add custom project list feature to search term script --- follow_repos_by_search_term.py | 34 ++++++++++++++++++++++++------- follow_top_repos_by_star_count.py | 5 ++--- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index 797dd5b..72a1c90 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -1,12 +1,14 @@ from typing import List from lgtm import LGTMSite + +import utils.cacher import utils.github_dates import utils.github_api import sys import time -def save_project_to_lgtm(site: 'LGTMSite', repo_name: str): +def save_project_to_lgtm(site: 'LGTMSite', repo_name: str) -> dict: print("About to save: " + repo_name) # Another throttle. Considering we are sending a request to Github # owned properties twice in a small time-frame, I would prefer for @@ -14,16 +16,17 @@ def save_project_to_lgtm(site: 'LGTMSite', repo_name: str): time.sleep(1) repo_url: str = 'https://github.com/' + repo_name - site.follow_repository(repo_url) + project = site.follow_repository(repo_url) print("Saved the project: " + repo_name) + return project -def find_and_save_projects_to_lgtm(language: str, search_term: str): +def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str]: github = utils.github_api.create() site = LGTMSite.create_from_file() - + saved_project_ids: List[str] = [] for date_range in utils.github_dates.generate_dates(): repos = github.search_repositories(query=f'language:{language} fork:false created:{date_range} {search_term}') - + print(f'language:{language} fork:false created:{date_range} {search_term}') for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: # https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting @@ -32,7 +35,18 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str): if repo.archived or repo.fork: continue - save_project_to_lgtm(site, repo.full_name) + saved_project = save_project_to_lgtm(site, repo.full_name) + print('--------------------') + print('--------------------') + print('--------------------') + print(saved_project) + print('--------------------') + print('--------------------') + print('--------------------') + saved_project_id = saved_project['realProject'][0]['key'] + saved_project_ids.append(saved_project_id) + + return saved_project_ids if len(sys.argv) < 3: print("Please make sure you provided a language and search term") @@ -42,4 +56,10 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str): search_term = sys.argv[2] print(f'Following repos for the {language} language that contain the \'{search_term}\' search term.') -find_and_save_projects_to_lgtm(language, search_term) +saved_project_ids = find_and_save_projects_to_lgtm(language, search_term) + +# If the user provided a second arg then they want to create a custom list. +if len(sys.argv) <= 4: + # print + custom_list_name = sys.argv[3] + utils.cacher.write_project_ids_to_file(saved_project_ids, custom_list_name) diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index ccf1be5..d9dd42c 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -28,7 +28,7 @@ import utils.github_dates import utils.github_api -import utils.cacher +import utils.cacher import sys import time @@ -75,8 +75,7 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: print('Following the top repos for %s' % language) saved_project_ids = find_and_save_projects_to_lgtm(language) -print("saved proejct ids") -print(saved_project_ids) + # If the user provided a second arg then they want to create a custom list. if len(sys.argv) <= 3: # print From a687d356e6550f0882b6620594f6efb51e61343e Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 18:00:33 -0500 Subject: [PATCH 13/57] Save only real projects to LGTM project lists --- follow_repos_by_search_term.py | 17 +++++++---------- follow_top_repos_by_star_count.py | 7 +++++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index 72a1c90..1d62013 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -24,9 +24,10 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] github = utils.github_api.create() site = LGTMSite.create_from_file() saved_project_ids: List[str] = [] + for date_range in utils.github_dates.generate_dates(): repos = github.search_repositories(query=f'language:{language} fork:false created:{date_range} {search_term}') - print(f'language:{language} fork:false created:{date_range} {search_term}') + for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: # https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting @@ -36,15 +37,11 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] continue saved_project = save_project_to_lgtm(site, repo.full_name) - print('--------------------') - print('--------------------') - print('--------------------') - print(saved_project) - print('--------------------') - print('--------------------') - print('--------------------') - saved_project_id = saved_project['realProject'][0]['key'] - saved_project_ids.append(saved_project_id) + + # Proto projects can't be saved to a project list, so instead we only grab real projects. + if "realProject" in saved_project: + saved_project_id = saved_project['realProject'][0]['key'] + saved_project_ids.append(saved_project_id) return saved_project_ids diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index d9dd42c..f06b62d 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -62,8 +62,11 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: continue saved_project = save_project_to_lgtm(site, repo.full_name) - saved_project_id = saved_project['realProject'][0]['key'] - saved_project_ids.append(saved_project_id) + + # Proto projects can't be saved to a project list, so instead we only grab real projects. + if "realProject" in saved_project: + saved_project_id = saved_project['realProject'][0]['key'] + saved_project_ids.append(saved_project_id) return saved_project_ids From a4133cc944cce88e5ffaf329fc013e2d4aaeaf6c Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 18:01:27 -0500 Subject: [PATCH 14/57] Remove unnecessary modules --- move_repos_to_lgtm_lists.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index 46d4bbf..d53ec14 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -1,8 +1,6 @@ from typing import List from lgtm import LGTMSite -import sys -import time import os def get_project_list_id(cached_file_name: str) -> str: From b5ecc8a528cfd39d885775db90f22dc6419ff5ac Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 22:31:35 -0500 Subject: [PATCH 15/57] Create cache folder if it already doesn't exist --- utils/cacher.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/utils/cacher.py b/utils/cacher.py index 3c1b6e9..392bad8 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -1,7 +1,16 @@ from typing import List +import os + +def create_cache_folder(): + if not os.path.exists('cache'): + os.makedirs('cache') def write_project_ids_to_file(project_ids: List[str], file_name: str): + create_cache_folder() + file = open("cache/" + file_name + ".txt", "a") + for project_id in project_ids: file.write(project_id + "\n") + file.close() From c770a2f06679143f5f99903fcf636f326faad497 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 22:32:52 -0500 Subject: [PATCH 16/57] Add draft for build in progress guard clause --- move_repos_to_lgtm_lists.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index d53ec14..73f35ed 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -44,6 +44,21 @@ def process_cached_file(cached_file_name): cleanup(cached_file) site = LGTMSite.create_from_file() + +projects = site.get_my_projects() + +# build_still_in_progess = False +# +# for project in projects: +# if project.get('protoproject') is not None and project.get('protoproject')['displayName']: +# if project['protoproject']['state'] == "build_attempt_in_progress": +# build_still_in_progess = True +# break +# +# if build_still_in_progess: +# print("There are projects still being processed by LGTM. It's not safe ") +# return + cached_file_names = os.listdir("cache") for cached_file_name in cached_file_names: From b83bacf2ec40e0a3691ac045e7a6b478c7659629 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 17 Feb 2021 22:33:04 -0500 Subject: [PATCH 17/57] Accept both proto and real projects --- follow_repos_by_search_term.py | 7 +++++-- follow_top_repos_by_star_count.py | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index 1d62013..d36479c 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -38,9 +38,12 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] saved_project = save_project_to_lgtm(site, repo.full_name) - # Proto projects can't be saved to a project list, so instead we only grab real projects. if "realProject" in saved_project: - saved_project_id = saved_project['realProject'][0]['key'] + saved_project_id = saved_project['realProject'][0]['displayName'] + saved_project_ids.append(saved_project_id) + + if "protoproject" in saved_project: + saved_project_id = saved_project['protoproject']['displayName'] saved_project_ids.append(saved_project_id) return saved_project_ids diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index f06b62d..44abe8d 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -63,9 +63,12 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: saved_project = save_project_to_lgtm(site, repo.full_name) - # Proto projects can't be saved to a project list, so instead we only grab real projects. if "realProject" in saved_project: - saved_project_id = saved_project['realProject'][0]['key'] + saved_project_id = saved_project['realProject'][0]['displayName'] + saved_project_ids.append(saved_project_id) + + if "protoproject" in saved_project: + saved_project_id = saved_project['protoproject']['displayName'] saved_project_ids.append(saved_project_id) return saved_project_ids From 1b2982a59b06be28cf498a56fcc42cb165b74fae Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 18 Feb 2021 20:27:49 -0500 Subject: [PATCH 18/57] Add ProjectBuild and ProjectBuilds classes --- utils/cacher.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/utils/cacher.py b/utils/cacher.py index 392bad8..64f6c62 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -1,11 +1,95 @@ from typing import List import os +import time + +class ProjectBuild: + def __init__(self, project: dict): + self.project = project + self.name = project["name"] + self.id = project["id"] + self.type = project["type"] + + def realProject(self) -> bool: + return self.type == "realProject" + + def protoproject(self) -> bool: + return self.type == "protoproject" + + def build_successful(self, followed_projects: List[dict]) -> bool: + # I don't know the name of the build successful status. + return ( + not self.build_in_progress(followed_projects) and + not self.build_failed(followed_projects) + ) + + def build_in_progress(self, followed_projects: List[dict]) -> bool: + return ( + self.project_part_of_cache(followed_projects) and + self.project_state("build_attempt_in_progress", followed_projects) + ) + + def build_failed(self, followed_projects: List[dict]) -> bool: + return ( + self.project_part_of_cache(followed_projects) and + self.project_state("build_attempt_failed", followed_projects) + ) + + def project_state(self, state: str, followed_projects: List[dict]) -> bool: + in_state = False + + for project in followed_projects: + if project.get('protoproject') is not None and project.get('protoproject')['state'] == state: + in_state = True + break + + return in_state + + def project_part_of_cache(self, followed_projects: List[dict]) -> bool: + part_of_cache = False + for project in followed_projects: + if project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.name: + part_of_cache = True + break + + return part_of_cache + +class ProjectBuilds: + def __init__(self, projects: List[ProjectBuild]): + self.projects = projects + + def unfollow_projects(self, site: 'LGTMSite'): + for project in self.projects: + time.sleep(1) + if project.realProject(): + site.unfollow_repository_by_id(project.id) + else: + site.unfollow_proto_repository_by_id(project.id) + + def return_successful_project_builds(self, site: 'LGTMSite') -> List[str]: + filtered_project_ids: List[str] = [] + followed_projects = site.get_my_projects() + + for project in self.projects: + if project.build_successful(followed_projects): + filtered_project_ids.append(project.id) + + return filtered_project_ids + + def build_processes_in_progress(self, followed_projects: List[dict]) -> bool: + in_progress = False + + for project in self.projects: + if project.build_in_progress(followed_projects): + in_progress = True + break + + return in_progress def create_cache_folder(): if not os.path.exists('cache'): os.makedirs('cache') -def write_project_ids_to_file(project_ids: List[str], file_name: str): +def write_project_data_to_file(project_ids: List[str], file_name: str): create_cache_folder() file = open("cache/" + file_name + ".txt", "a") @@ -14,3 +98,25 @@ def write_project_ids_to_file(project_ids: List[str], file_name: str): file.write(project_id + "\n") file.close() + +def get_project_builds(cached_file: str) -> ProjectBuilds: + file = open(cached_file, "r") + + project_data = file.read().split("\n") + + while("" in project_data): + project_data.remove("") + + for i, project in enumerate(project_data): + project_data[i] = ProjectBuild({ + "name": project.split(",")[0], + "id": project.split(",")[1], + "type": project.split(",")[2], + }) + + file.close() + + return ProjectBuilds(project_data) + +def remove_file(file_name: str): + os.remove(file_name) From 0bdd4cc4cd298e3e132939b92e5ade2f32dac3ca Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 18 Feb 2021 20:28:13 -0500 Subject: [PATCH 19/57] Remove logs and add new request for proto projects --- lgtm.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lgtm.py b/lgtm.py index 905a56d..0927e6b 100644 --- a/lgtm.py +++ b/lgtm.py @@ -55,7 +55,7 @@ def _make_lgtm_post(self, url: str, data: dict) -> dict: 'apiVersion': self.api_version } full_data = {**api_data, **data} - print(data) + # print(data) r = requests.post( url, full_data, @@ -68,7 +68,7 @@ def _make_lgtm_post(self, url: str, data: dict) -> dict: response_text = r.text raise LGTMRequestException(f'Failed to parse JSON. Response was: {response_text}') from e - print(data_returned) + # print(data_returned) if data_returned['status'] == 'success': if 'data' in data_returned: return data_returned['data'] @@ -124,6 +124,14 @@ def unfollow_repository_by_id(self, project_id: str): } self._make_lgtm_post(url, data) + # Given a project id, unfollow the protoproject + def unfollow_proto_repository_by_id(self, project_id: str): + url = "https://lgtm.com/internal_api/v0.2/unfollowProtoproject" + data = { + 'protoproject_key': project_id, + } + self._make_lgtm_post(url, data) + def unfollow_repository(self, simple_project: 'SimpleProject'): url = "https://lgtm.com/internal_api/v0.2/unfollowProject" if not simple_project.is_protoproject \ else "https://lgtm.com/internal_api/v0.2/unfollowProtoproject" From 88e37933a13166b4ba2b885ac3d300faeb3fac79 Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 18 Feb 2021 20:28:33 -0500 Subject: [PATCH 20/57] Save more project data to cache files --- follow_repos_by_search_term.py | 22 +++++++++++++--------- follow_top_repos_by_star_count.py | 19 +++++++++++-------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index d36479c..63b95f9 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -23,11 +23,12 @@ def save_project_to_lgtm(site: 'LGTMSite', repo_name: str) -> dict: def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str]: github = utils.github_api.create() site = LGTMSite.create_from_file() - saved_project_ids: List[str] = [] + saved_project_data: List[str] = [] for date_range in utils.github_dates.generate_dates(): - repos = github.search_repositories(query=f'language:{language} fork:false created:{date_range} {search_term}') + repos = github.search_repositories(query=f'stars:>5 language:{language} fork:false created:{date_range} {search_term}') + # TODO: This occasionally returns requests.exceptions.ConnectionError which is annoying as hell. It would be nice if we built in exception handling. for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: # https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting @@ -38,15 +39,18 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] saved_project = save_project_to_lgtm(site, repo.full_name) + # TODO: This process is duplicated elsewhere and should be under one location if "realProject" in saved_project: - saved_project_id = saved_project['realProject'][0]['displayName'] - saved_project_ids.append(saved_project_id) + saved_project_name = saved_project['realProject'][0]['displayName'] + saved_project_id = saved_project['realProject'][0]['key'] + saved_project_data.append(f'{saved_project_name},{saved_project_id},realProject') if "protoproject" in saved_project: - saved_project_id = saved_project['protoproject']['displayName'] - saved_project_ids.append(saved_project_id) + saved_project_name = saved_project['protoproject']['displayName'] + saved_project_id = saved_project['protoproject']['key'] + saved_project_data.append(f'{saved_project_name},{saved_project_id},protoproject') - return saved_project_ids + return saved_project_data if len(sys.argv) < 3: print("Please make sure you provided a language and search term") @@ -56,10 +60,10 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] search_term = sys.argv[2] print(f'Following repos for the {language} language that contain the \'{search_term}\' search term.') -saved_project_ids = find_and_save_projects_to_lgtm(language, search_term) +saved_project_data = find_and_save_projects_to_lgtm(language, search_term) # If the user provided a second arg then they want to create a custom list. if len(sys.argv) <= 4: # print custom_list_name = sys.argv[3] - utils.cacher.write_project_ids_to_file(saved_project_ids, custom_list_name) + utils.cacher.write_project_data_to_file(saved_project_data, custom_list_name) diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 44abe8d..b434ce5 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -48,7 +48,7 @@ def save_project_to_lgtm(site: 'LGTMSite', repo_name: str) -> dict: def find_and_save_projects_to_lgtm(language: str) -> List[str]: github = utils.github_api.create() site = LGTMSite.create_from_file() - saved_project_ids: List[str] = [] + saved_project_data: List[str] = [] for date_range in utils.github_dates.generate_dates(): repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}') @@ -63,15 +63,18 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: saved_project = save_project_to_lgtm(site, repo.full_name) + # TODO: This process is duplicated elsewhere and should be under one location if "realProject" in saved_project: - saved_project_id = saved_project['realProject'][0]['displayName'] - saved_project_ids.append(saved_project_id) + saved_project_name = saved_project['realProject'][0]['displayName'] + saved_project_id = saved_project['realProject'][0]['key'] + saved_project_data.append(f'{saved_project_name},{saved_project_id},realProject') if "protoproject" in saved_project: - saved_project_id = saved_project['protoproject']['displayName'] - saved_project_ids.append(saved_project_id) + saved_project_name = saved_project['protoproject']['displayName'] + saved_project_id = saved_project['protoproject']['key'] + saved_project_data.append(f'{saved_project_name},{saved_project_id},protoproject') - return saved_project_ids + return saved_project_data if len(sys.argv) < 2: print("Please provide a language you want to search") @@ -80,10 +83,10 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: language = sys.argv[1].capitalize() print('Following the top repos for %s' % language) -saved_project_ids = find_and_save_projects_to_lgtm(language) +saved_project_data = find_and_save_projects_to_lgtm(language) # If the user provided a second arg then they want to create a custom list. if len(sys.argv) <= 3: # print custom_list_name = sys.argv[2] - utils.cacher.write_project_ids_to_file(saved_project_ids, custom_list_name) + utils.cacher.write_project_data_to_file(saved_project_data, custom_list_name) From f515563fd94aa338fc0dfbd006014f53b6fba90c Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 18 Feb 2021 20:28:59 -0500 Subject: [PATCH 21/57] Refactor how we move repos to LGTM lists --- move_repos_to_lgtm_lists.py | 72 ++++++++++++------------------------- 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index 73f35ed..1dd9f9e 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -1,65 +1,39 @@ from typing import List from lgtm import LGTMSite +import utils.cacher import os +import time -def get_project_list_id(cached_file_name: str) -> str: +def get_project_list_id(cached_file_name: str, site: 'LGTMSite') -> str: project_list_name = cached_file_name.split(".")[0] - # We want to find or create a project list based on the the name of - # the text file that holds all of the projects we are currently following. return site.get_or_create_project_list(project_list_name) -def get_project_ids(cached_file: str) -> List[str]: - file = open(cached_file, "r") - - project_ids = file.read().split("\n") - - # remove any "" in the array - while("" in project_ids): - project_ids.remove("") - - return project_ids - -def cleanup(file_name: str): - # Since we are done with the file, we can now delete it from the cache. - os.remove(file_name) - -def unfollow_projects(project_ids: List[str]): - for project_id in project_ids: - # The last thing we need to do is tidy up and unfollow all the repositories - # we just added to our project list. - site.unfollow_repository_by_id(project_id) - -def process_cached_file(cached_file_name): +def process_cached_file(cached_file_name: str, site: 'LGTMSite'): cached_file = "cache/" + cached_file_name - project_list_id = get_project_list_id(cached_file_name) - project_ids = get_project_ids(cached_file) - - # With the project list id and the project ids, we now want to save the repos - # we currently follow to the project list - site.load_into_project_list(project_list_id, project_ids) + project_builds = utils.cacher.get_project_builds(cached_file) + followed_projects = site.get_my_projects() - unfollow_projects(project_ids) - cleanup(cached_file) + if project_builds.build_processes_in_progress(followed_projects): + print(f'The {cached_file_name} can\'t be processed at this time because a project build is still in progress.') + return -site = LGTMSite.create_from_file() + project_list_id = get_project_list_id(cached_file_name, site) + print("Moving followed projects to the project list") -projects = site.get_my_projects() + # site.load_into_project_list(project_list_id, project_builds.return_successful_project_builds(site)) + + # If a project fails to be processed by LGTM, we still unfollow the project. + print("Unfollowing projects") + project_builds.unfollow_projects(site) + print("Removing the cache file.") + utils.cacher.remove_file(cached_file) + print("Done processing cache file.") -# build_still_in_progess = False -# -# for project in projects: -# if project.get('protoproject') is not None and project.get('protoproject')['displayName']: -# if project['protoproject']['state'] == "build_attempt_in_progress": -# build_still_in_progess = True -# break -# -# if build_still_in_progess: -# print("There are projects still being processed by LGTM. It's not safe ") -# return +site = LGTMSite.create_from_file() -cached_file_names = os.listdir("cache") +for cached_file_name in os.listdir("cache"): + process_cached_file(cached_file_name, site) -for cached_file_name in cached_file_names: - process_cached_file(cached_file_name) +print("Finished!") From a853b9821be9980b580559594e5db947f414047a Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 18 Feb 2021 20:29:20 -0500 Subject: [PATCH 22/57] Update README with LGTM build process info --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index b41bd09..1e756be 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,13 @@ commands. This CUSTOM_LIST_NAME represents the name of a LGTM project list that Note: When naming a project custom list name, please use alphanumeric, dashes, and underscore characters only. +### Build Processes By LGTM +LGTM can't move projects that are being processed into custom lists. To resolve this, we've added a check that confirms whether or not all projects you plan on moving to a custom list are processed. If a project isn't processed, we will not move any projects into the custom list and you'll receive the following error: + +> The can't be processed at this time because a project build is still in progress. + +If you receive this error, wait a few hours and run the script again. + ## Legal The author of this script assumes no liability for your use of this project, including, From 01842f714088bddfffcdc62f2e8aba8b2f18da47 Mon Sep 17 00:00:00 2001 From: Thank You Date: Sun, 21 Feb 2021 18:59:54 -0500 Subject: [PATCH 23/57] Add Python documentation for functions --- lgtm.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/lgtm.py b/lgtm.py index 0927e6b..0bf6feb 100644 --- a/lgtm.py +++ b/lgtm.py @@ -35,8 +35,14 @@ def _make_lgtm_get(self, url: str) -> dict: ) return r.json() - # Retrieves a user's projects def get_my_projects(self) -> List[dict]: + ''' + Returns a user's projects. + + Returns: + data (List[dict]): Response data from LGTM + ''' + url = 'https://lgtm.com/internal_api/v0.2/getMyProjects?apiVersion=' + self.api_version data = self._make_lgtm_get(url) if data['status'] == 'success': @@ -44,13 +50,32 @@ def get_my_projects(self) -> List[dict]: else: raise LGTMRequestException('LGTM GET request failed with response: %s' % str(data)) - # Given an org name, retrieve a user's projects under that org def get_my_projects_under_org(self, org: str) -> List['SimpleProject']: + ''' + Given an org name, returns a user's projects that are part of an org. + + Parameters: + org (str): An organization + + Returns: + projects (['SimpleProject']): List of SimpleProject's from LGTM part of an org. + ''' + projects_sorted = LGTMDataFilters.org_to_ids(self.get_my_projects()) return LGTMDataFilters.extract_project_under_org(org, projects_sorted) - # This method handles making a POST request to the LGTM server def _make_lgtm_post(self, url: str, data: dict) -> dict: + ''' + Makes a HTTP post request to LGTM.com + + Parameters: + url (str): A URL representing where the HTTP request goes + data (dict): Data that will be sent to LGTM.com in the request.. + + Returns: + data (dict): Data returned from LGTM.com response. + ''' + api_data = { 'apiVersion': self.api_version } @@ -77,8 +102,15 @@ def _make_lgtm_post(self, url: str, data: dict) -> dict: else: raise LGTMRequestException('LGTM POST request failed with response: %s' % str(data_returned)) - # Given a project list id and a list of project ids, add the projects to the project list. def load_into_project_list(self, into_project: int, lgtm_project_ids: List[str]): + ''' + Given a project list id and a list of project ids, add the projects to the project list on LGTM.com. + + Parameters: + into_project (int): Project list id + lgtm_project_ids (List[str]): List of project ids + ''' + url = "https://lgtm.com/internal_api/v0.2/updateProjectSelection" # Because LGTM uses some wacky format for it's application/x-www-form-urlencoded data list_serialized = ', '.join([('"' + str(elem) + '"') for elem in lgtm_project_ids]) @@ -116,16 +148,28 @@ def follow_repository(self, repository_url: str) -> dict: } return self._make_lgtm_post(url, data) - # Given a project id, unfollow the project def unfollow_repository_by_id(self, project_id: str): + ''' + Given a project id, unfollows a repository. + + Parameters: + project_id (str): A project id + ''' + url = "https://lgtm.com/internal_api/v0.2/unfollowProject" data = { 'project_key': project_id, } self._make_lgtm_post(url, data) - # Given a project id, unfollow the protoproject def unfollow_proto_repository_by_id(self, project_id: str): + ''' + Given a project id, unfollows the proto repository. + + Parameters: + project_id (str): A project id + ''' + url = "https://lgtm.com/internal_api/v0.2/unfollowProtoproject" data = { 'protoproject_key': project_id, From 2313cc363396d57d22f76d80e75778da07166877 Mon Sep 17 00:00:00 2001 From: Thank You Date: Sun, 21 Feb 2021 21:47:21 -0500 Subject: [PATCH 24/57] Add comment --- follow_repos_by_search_term.py | 2 ++ follow_top_repos_by_star_count.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index 63b95f9..ea5fa51 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -40,6 +40,8 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] saved_project = save_project_to_lgtm(site, repo.full_name) # TODO: This process is duplicated elsewhere and should be under one location + # We only save realProjects to the cache since those are the only + # ones we can actually process. if "realProject" in saved_project: saved_project_name = saved_project['realProject'][0]['displayName'] saved_project_id = saved_project['realProject'][0]['key'] diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index b434ce5..8128f67 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -11,7 +11,6 @@ # lgtm followed projects into their own lists. this script will take the text file name # and use that to create a list. it will then move the lgtm projects into that list and # unfollow them from the lgtm list. this script can be used universally. -# # - explicit changes: # - current scripts: # - each script must now accept a list arg that represents the list name that you want From 32d4fd91c135c72c779501457f720f3a7e87040a Mon Sep 17 00:00:00 2001 From: Thank You Date: Sun, 21 Feb 2021 21:47:31 -0500 Subject: [PATCH 25/57] Remove unnecessary comments --- follow_top_repos_by_star_count.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 8128f67..14fa0ac 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -1,27 +1,3 @@ -# ## How the script currently works: -# - We first get all the github repos. -# - We then take each repo and follow the repository in lgtm -# -# -# ## changes that need to be made -# - Since we are adding lists to lgtm, we also need to store someplace every -# repo that we added to lgtm. -# - once teh script is done the list of lgtm saved projects will be stored in a txt file -# - after a period of time, say 24 hrs, we then run a companion script that moves -# lgtm followed projects into their own lists. this script will take the text file name -# and use that to create a list. it will then move the lgtm projects into that list and -# unfollow them from the lgtm list. this script can be used universally. -# - explicit changes: -# - current scripts: -# - each script must now accept a list arg that represents the list name that you want -# your repos to be saved to. -# - each script must now add the lgtm project id to a file that stores repos (txt file) -# - new script: -# - we need a script that will take a text file, loop through the text file, -# and for each item in the text file add the item to the lgtm list (the list name -# is derived from the name of the ext file) -# - from typing import List from lgtm import LGTMSite From 6c825f6241ed64d1787bdf4ecb0c9d8f3541aa32 Mon Sep 17 00:00:00 2001 From: Thank You Date: Sun, 21 Feb 2021 21:48:45 -0500 Subject: [PATCH 26/57] Add guard clauses and improved project filtering --- utils/cacher.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/utils/cacher.py b/utils/cacher.py index 64f6c62..b423dfe 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -1,6 +1,7 @@ from typing import List import os import time +from lgtm import LGTMSite class ProjectBuild: def __init__(self, project: dict): @@ -16,6 +17,16 @@ def protoproject(self) -> bool: return self.type == "protoproject" def build_successful(self, followed_projects: List[dict]) -> bool: + if self.protoproject: + # A throttle that although may not be necessary a nice plus. + time.sleep(1) + site = LGTMSite.create_from_file() + data = site.retrieve_project(self.name) + + # A failed protoproject build will always be intrepreted to LGTM as a project that can't be found. + if 'code' in data and data['code'] == 404: + return False + # I don't know the name of the build successful status. return ( not self.build_in_progress(followed_projects) and @@ -47,7 +58,10 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: def project_part_of_cache(self, followed_projects: List[dict]) -> bool: part_of_cache = False for project in followed_projects: - if project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.name: + if ( + project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.name or + project.get('realProject') is not None and project.get('realProject')[0]['displayName'] == self.name + ): part_of_cache = True break @@ -63,6 +77,12 @@ def unfollow_projects(self, site: 'LGTMSite'): if project.realProject(): site.unfollow_repository_by_id(project.id) else: + data = site.retrieve_project(project.name) + + # A failed protoproject build will always be intrepreted to LGTM as a project that can't be found. + if 'code' in data and data['code'] == 404: + return + site.unfollow_proto_repository_by_id(project.id) def return_successful_project_builds(self, site: 'LGTMSite') -> List[str]: From 39229730f7b9e68cc1495855faaacd19de09cbfe Mon Sep 17 00:00:00 2001 From: Thank You Date: Mon, 22 Feb 2021 13:32:36 -0500 Subject: [PATCH 27/57] Increase timer --- utils/cacher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/cacher.py b/utils/cacher.py index b423dfe..8ea737a 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -19,7 +19,7 @@ def protoproject(self) -> bool: def build_successful(self, followed_projects: List[dict]) -> bool: if self.protoproject: # A throttle that although may not be necessary a nice plus. - time.sleep(1) + time.sleep(2) site = LGTMSite.create_from_file() data = site.retrieve_project(self.name) From a3cf8e20d22a87b172c0b08cfd55f9666f37c731 Mon Sep 17 00:00:00 2001 From: Thank You Date: Mon, 22 Feb 2021 13:34:23 -0500 Subject: [PATCH 28/57] Uncomment code --- move_repos_to_lgtm_lists.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/move_repos_to_lgtm_lists.py b/move_repos_to_lgtm_lists.py index 1dd9f9e..61a8e64 100644 --- a/move_repos_to_lgtm_lists.py +++ b/move_repos_to_lgtm_lists.py @@ -3,7 +3,6 @@ import utils.cacher import os -import time def get_project_list_id(cached_file_name: str, site: 'LGTMSite') -> str: project_list_name = cached_file_name.split(".")[0] @@ -20,10 +19,10 @@ def process_cached_file(cached_file_name: str, site: 'LGTMSite'): return project_list_id = get_project_list_id(cached_file_name, site) + print("Moving followed projects to the project list") + site.load_into_project_list(project_list_id, project_builds.return_successful_project_builds(site)) - # site.load_into_project_list(project_list_id, project_builds.return_successful_project_builds(site)) - # If a project fails to be processed by LGTM, we still unfollow the project. print("Unfollowing projects") project_builds.unfollow_projects(site) From 50fc91e9676b63f5e8819fe7bfa146ed5c708232 Mon Sep 17 00:00:00 2001 From: Thank You Date: Mon, 22 Feb 2021 13:59:06 -0500 Subject: [PATCH 29/57] Remove unnecessary comment --- follow_repos_by_search_term.py | 1 - follow_top_repos_by_star_count.py | 1 - 2 files changed, 2 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index ea5fa51..f4e3f4c 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -66,6 +66,5 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] # If the user provided a second arg then they want to create a custom list. if len(sys.argv) <= 4: - # print custom_list_name = sys.argv[3] utils.cacher.write_project_data_to_file(saved_project_data, custom_list_name) diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 14fa0ac..0184906 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -62,6 +62,5 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: # If the user provided a second arg then they want to create a custom list. if len(sys.argv) <= 3: - # print custom_list_name = sys.argv[2] utils.cacher.write_project_data_to_file(saved_project_data, custom_list_name) From 2cd04b5b07878e484cb9c7e6290fcf4c8de93d47 Mon Sep 17 00:00:00 2001 From: Thank You Date: Mon, 22 Feb 2021 14:01:20 -0500 Subject: [PATCH 30/57] Add HTTP retries During testing I've noticed the HTTP request fails on occasion. This unfortunately halts the entire script. To resolve this, I'm adding a HTTP retry mechanism for post requests. The idea is to allow for retries to occur when we get 5XX codes. A small issue that may occur is that LGTM is not idempotent. That said, I think that for our use case we should be fine. --- lgtm.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lgtm.py b/lgtm.py index 0bf6feb..0834509 100644 --- a/lgtm.py +++ b/lgtm.py @@ -3,7 +3,8 @@ import requests import yaml - +from urllib3.util.retry import Retry +from requests.adapters import HTTPAdapter class LGTMRequestException(Exception): pass @@ -81,12 +82,22 @@ def _make_lgtm_post(self, url: str, data: dict) -> dict: } full_data = {**api_data, **data} # print(data) - r = requests.post( + + session = requests.Session() + + retries = Retry(total=3, + backoff_factor=0.1, + status_forcelist=[ 500, 502, 503, 504 ]) + + session.mount('https://', HTTPAdapter(max_retries=retries)) + + r = session.post( url, full_data, cookies=self._cookies(), headers=self._headers() ) + try: data_returned = r.json() except ValueError as e: From c8e33ae4b1e588b0b693be530ab52250fa8da505 Mon Sep 17 00:00:00 2001 From: Thank You Date: Mon, 22 Feb 2021 14:01:57 -0500 Subject: [PATCH 31/57] Remove unnecessary prints --- lgtm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lgtm.py b/lgtm.py index 0834509..2939ceb 100644 --- a/lgtm.py +++ b/lgtm.py @@ -81,7 +81,6 @@ def _make_lgtm_post(self, url: str, data: dict) -> dict: 'apiVersion': self.api_version } full_data = {**api_data, **data} - # print(data) session = requests.Session() @@ -104,7 +103,6 @@ def _make_lgtm_post(self, url: str, data: dict) -> dict: response_text = r.text raise LGTMRequestException(f'Failed to parse JSON. Response was: {response_text}') from e - # print(data_returned) if data_returned['status'] == 'success': if 'data' in data_returned: return data_returned['data'] From 58b4d1ec30780a45a934848a980b18d867408756 Mon Sep 17 00:00:00 2001 From: Thank You Date: Mon, 22 Feb 2021 21:10:22 -0500 Subject: [PATCH 32/57] Fix various issues with moving repos to lists --- utils/cacher.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index 8ea737a..85c7234 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -27,7 +27,6 @@ def build_successful(self, followed_projects: List[dict]) -> bool: if 'code' in data and data['code'] == 404: return False - # I don't know the name of the build successful status. return ( not self.build_in_progress(followed_projects) and not self.build_failed(followed_projects) @@ -49,10 +48,19 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: in_state = False for project in followed_projects: - if project.get('protoproject') is not None and project.get('protoproject')['state'] == state: + if project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.name and project.get('protoproject')['state'] == state: in_state = True break + # Real projects always have successful builds, or at least as far as I can tell. + if project.get('realProject') is not None and project.get('realProject')[0]['displayName'] == self.name: + if state == "build_attempt_in_progress" or state == "build_attempt_failed": + in_state == False + else: + in_state = True + + break + return in_state def project_part_of_cache(self, followed_projects: List[dict]) -> bool: @@ -74,6 +82,7 @@ def __init__(self, projects: List[ProjectBuild]): def unfollow_projects(self, site: 'LGTMSite'): for project in self.projects: time.sleep(1) + if project.realProject(): site.unfollow_repository_by_id(project.id) else: @@ -81,7 +90,7 @@ def unfollow_projects(self, site: 'LGTMSite'): # A failed protoproject build will always be intrepreted to LGTM as a project that can't be found. if 'code' in data and data['code'] == 404: - return + continue site.unfollow_proto_repository_by_id(project.id) From 08f1b7c82de68514f4104056ebeae9a0ec513988 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 24 Feb 2021 13:40:06 -0500 Subject: [PATCH 33/57] Add HTTP retries when retrieving a project --- lgtm.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lgtm.py b/lgtm.py index 2939ceb..9540143 100644 --- a/lgtm.py +++ b/lgtm.py @@ -246,7 +246,16 @@ def add_org_to_project_list_by_list_name(self, org: str, project_name: str): @staticmethod def retrieve_project(gh_project_path: str): url = "https://lgtm.com/api/v1.0/projects/g/" + gh_project_path - r = requests.get(url) + + session = requests.Session() + + retries = Retry(total=3, + backoff_factor=0.1, + status_forcelist=[ 500, 502, 503, 504 ]) + + session.mount('https://', HTTPAdapter(max_retries=retries)) + + r = session.get(url) return r.json() @staticmethod From 429c9bab99dfd552dd827dd8da57ca2b27e9553e Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 24 Feb 2021 13:45:49 -0500 Subject: [PATCH 34/57] Add check for protoprojects There isn't an easy way to see if a protoproject is eligible to be moved over to a custom list. To resolve this, we are checking to see if the project's language status is a success. This tells us that the protoproject succeeded in a build and is eligible to be moved. Also, we are updating the self.id here. This is because of how LGTM treats protoprojects. A protoproject has a different type of id than a real project. However, as soon as a protoproject build succeeded, LGTM updates the protoproject's id. This is frustrating as it invalidates our cache record of the protoproject id. So to resolve this we update the protoproject id programmatically in-memory. It's nasty but it works. --- utils/cacher.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/utils/cacher.py b/utils/cacher.py index 85c7234..b92ada5 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -27,6 +27,13 @@ def build_successful(self, followed_projects: List[dict]) -> bool: if 'code' in data and data['code'] == 404: return False + # In this case, the protoproject likely succeeded. To confirm this, + # we check the language status to confirm the build succeeded. + for language in data['languages']: + if language['status'] == "success": + self.id = data['id'] + return True + return ( not self.build_in_progress(followed_projects) and not self.build_failed(followed_projects) From ba0e6f445dd269d08142403af9fc1d2a8e4ca195 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 13:28:46 -0500 Subject: [PATCH 35/57] Handle exceptions from LGTM --- utils/cacher.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index b92ada5..646a34a 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -1,7 +1,7 @@ from typing import List import os import time -from lgtm import LGTMSite +from lgtm import LGTMSite, LGTMRequestException class ProjectBuild: def __init__(self, project: dict): @@ -88,18 +88,38 @@ def __init__(self, projects: List[ProjectBuild]): def unfollow_projects(self, site: 'LGTMSite'): for project in self.projects: - time.sleep(1) + time.sleep(2) if project.realProject(): - site.unfollow_repository_by_id(project.id) + self.unfollow_real_project(project.id) else: data = site.retrieve_project(project.name) - # A failed protoproject build will always be intrepreted to LGTM as a project that can't be found. + # A failed protoproject build will always be intrepreted to LGTM + # as a project that can't be found. if 'code' in data and data['code'] == 404: continue - site.unfollow_proto_repository_by_id(project.id) + self.unfollow_proto_project(data['id']) + + def unfollow_proto_project(id: int): + try: + time.sleep(2) + + site.unfollow_proto_repository_by_id(id) + except LGTMRequestException as e: + # In some cases even though we've recorded the project as a protoproject + # it's actually a realproject. So we can't unfollow it via a proto-project + # unfollow API call. We can however unfollow it via the real project API call. + self.unfollow_real_project(id) + + def unfollow_real_project(id: int): + try: + time.sleep(2) + + site.unfollow_repository_by_id(data['id']) + except LGTMRequestException as e: + print(f"An unknown issue occurred unfollowing {project.name}") def return_successful_project_builds(self, site: 'LGTMSite') -> List[str]: filtered_project_ids: List[str] = [] From 85b368e17adae4e012a652b77cffd49e574a7419 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 13:29:37 -0500 Subject: [PATCH 36/57] Delete test.py --- test.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index 90e39df..0000000 --- a/test.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import List -import sys -import os - -print(len(sys.argv)) - - -# -# projects: List[str] = [] -# -# print(sys.argv) - -# from lgtm import LGTMSite -# lgtm_site = LGTMSite.create_from_file() -# -# repo_url: str = 'https://github.com/google/jax' -# -# result = lgtm_site.follow_repository(repo_url) -# print("1111111111") -# print("1111111111") -# print("1111111111") -# print("1111111111") -# print("1111111111") -# project_id = result['realProject'][0]['key'] -# print(project_id) -# -# print("1111111111") -# print("1111111111") -# print("1111111111") -# print("1111111111") -# project_list_id = lgtm_site.get_or_create_project_list("test_project_16") -# print(project_list_id) -# -# lgtm_site.load_into_project_list(project_list_id, [project_id]) -# -# lgtm_site.unfollow_repository_by_id(project_id) From cbe5fa5b09ef62745e0ea2a22a262db12a22106b Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 13:39:26 -0500 Subject: [PATCH 37/57] Clarify API call to LGTM --- lgtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lgtm.py b/lgtm.py index 9540143..33ebe65 100644 --- a/lgtm.py +++ b/lgtm.py @@ -38,7 +38,7 @@ def _make_lgtm_get(self, url: str) -> dict: def get_my_projects(self) -> List[dict]: ''' - Returns a user's projects. + Returns a user's followed projects that are not in a custom list. Returns: data (List[dict]): Response data from LGTM From 1e40f13f758961f4bd710eb0a12b8f2702d4c4a7 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:02:07 -0500 Subject: [PATCH 38/57] Refactor how we build SimpleProjects I refactored the SimpleProject and LGTMDataFilters section in order to take advantage of some code that determines whether a project is a real project or a proto project. I also added two attributes to SimpleProject to get the refactor complete. --- lgtm.py | 108 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 33 deletions(-) diff --git a/lgtm.py b/lgtm.py index 33ebe65..ec9d0de 100644 --- a/lgtm.py +++ b/lgtm.py @@ -284,6 +284,8 @@ class SimpleProject: display_name: str key: str is_protoproject: bool + is_valid_project: bool + org: str def make_post_data(self): data_dict_key = 'protoproject_key' if self.is_protoproject else 'project_key' @@ -302,43 +304,42 @@ def org_to_ids(projects: List[Dict]) -> Dict[str, List[SimpleProject]]: """ org_to_ids = {} for project in projects: - org: str - display_name: str - key: str - is_protoproject: bool - if 'protoproject' in project: - the_project = project['protoproject'] - if 'https://github.com/' not in the_project['cloneUrl']: - # Not really concerned with BitBucket right now - continue - display_name = the_project['displayName'] - org = display_name.split('/')[0] - key = the_project['key'] - is_protoproject = True - elif 'realProject' in project: - - the_project = project['realProject'][0] - if the_project['repoProvider'] != 'github_apps': - # Not really concerned with BitBucket right now - continue - org = str(the_project['slug']).split('/')[1] - display_name = the_project['displayName'] - key = the_project['key'] - is_protoproject = False - else: - raise KeyError('\'realProject\' nor \'protoproject\' in %s' % str(project)) + simple_project = LGTMDataFilters.build_simple_project(project) + if not simple_project.is_valid_project: + continue + # org: str + # display_name: str + # key: str + # is_protoproject: bool + # if 'protoproject' in project: + # the_project = project['protoproject'] + # if 'https://github.com/' not in the_project['cloneUrl']: + # # Not really concerned with BitBucket right now + # continue + # display_name = the_project['displayName'] + # org = display_name.split('/')[0] + # key = the_project['key'] + # is_protoproject = True + # elif 'realProject' in project: + # + # the_project = project['realProject'][0] + # if the_project['repoProvider'] != 'github_apps': + # # Not really concerned with BitBucket right now + # continue + # org = str(the_project['slug']).split('/')[1] + # display_name = the_project['displayName'] + # key = the_project['key'] + # is_protoproject = False + # else: + # raise KeyError('\'realProject\' nor \'protoproject\' in %s' % str(project)) ids_list: List[SimpleProject] - if org in org_to_ids: - ids_list = org_to_ids[org] + if simple_project.org in org_to_ids: + ids_list = org_to_ids[simple_project.org] else: ids_list = [] - org_to_ids[org] = ids_list - ids_list.append(SimpleProject( - display_name=display_name, - key=key, - is_protoproject=is_protoproject - )) + org_to_ids[simple_project.org] = ids_list + ids_list.append(simple_project) return org_to_ids @@ -348,3 +349,44 @@ def extract_project_under_org(org: str, projects_sorted: Dict[str, List[SimplePr print('org %s not found in projects list' % org) return [] return projects_sorted[org] + + @staticmethod + def build_simple_project(project: dict) -> SimpleProject: + org: str + display_name: str + key: str + is_protoproject: bool + is_valid_project: bool = True + + if 'protoproject' in project: + the_project = project['protoproject'] + if 'https://github.com/' not in the_project['cloneUrl']: + # Not really concerned with BitBucket right now + is_valid_project = False + display_name = the_project['displayName'] + org = display_name.split('/')[0] + key = the_project['key'] + is_protoproject = True + elif 'realProject' in project: + + the_project = project['realProject'][0] + if the_project['repoProvider'] != 'github_apps': + # Not really concerned with BitBucket right now + is_valid_project = False + org = str(the_project['slug']).split('/')[1] + display_name = the_project['displayName'] + key = the_project['key'] + is_protoproject = False + else: + # I don't know why this is here. Considering we have a new setup where + # we check the object to see if it's a valid project, could we not just + # set is_valid_project to False and let the code elsewhere catch that? + raise KeyError('\'realProject\' nor \'protoproject\' in %s' % str(project)) + + return SimpleProject( + display_name=display_name, + key=key, + is_protoproject=is_protoproject, + is_valid_project=is_valid_project, + org=org + ) From aa14305eec725941dc31fd9e308fc897f3d0814f Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:06:40 -0500 Subject: [PATCH 39/57] Rename method --- utils/cacher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index 646a34a..35d4816 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -41,13 +41,13 @@ def build_successful(self, followed_projects: List[dict]) -> bool: def build_in_progress(self, followed_projects: List[dict]) -> bool: return ( - self.project_part_of_cache(followed_projects) and + self.project_currently_followed(followed_projects) and self.project_state("build_attempt_in_progress", followed_projects) ) def build_failed(self, followed_projects: List[dict]) -> bool: return ( - self.project_part_of_cache(followed_projects) and + self.project_currently_followed(followed_projects) and self.project_state("build_attempt_failed", followed_projects) ) @@ -70,7 +70,7 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: return in_state - def project_part_of_cache(self, followed_projects: List[dict]) -> bool: + def project_currently_followed(self, followed_projects: List[dict]) -> bool: part_of_cache = False for project in followed_projects: if ( From bedc587648ac7bbafb684d55a8a787718148dfa8 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:17:38 -0500 Subject: [PATCH 40/57] Remove useless code --- lgtm.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/lgtm.py b/lgtm.py index ec9d0de..0174778 100644 --- a/lgtm.py +++ b/lgtm.py @@ -307,31 +307,6 @@ def org_to_ids(projects: List[Dict]) -> Dict[str, List[SimpleProject]]: simple_project = LGTMDataFilters.build_simple_project(project) if not simple_project.is_valid_project: continue - # org: str - # display_name: str - # key: str - # is_protoproject: bool - # if 'protoproject' in project: - # the_project = project['protoproject'] - # if 'https://github.com/' not in the_project['cloneUrl']: - # # Not really concerned with BitBucket right now - # continue - # display_name = the_project['displayName'] - # org = display_name.split('/')[0] - # key = the_project['key'] - # is_protoproject = True - # elif 'realProject' in project: - # - # the_project = project['realProject'][0] - # if the_project['repoProvider'] != 'github_apps': - # # Not really concerned with BitBucket right now - # continue - # org = str(the_project['slug']).split('/')[1] - # display_name = the_project['displayName'] - # key = the_project['key'] - # is_protoproject = False - # else: - # raise KeyError('\'realProject\' nor \'protoproject\' in %s' % str(project)) ids_list: List[SimpleProject] if simple_project.org in org_to_ids: From e362ef832cd47402ea8dd62f57287ceffd539589 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:18:20 -0500 Subject: [PATCH 41/57] Rename ProjectBuild#name and refactor code --- utils/cacher.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index 35d4816..0e92335 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -1,12 +1,14 @@ from typing import List import os import time -from lgtm import LGTMSite, LGTMRequestException +from lgtm import LGTMSite, LGTMRequestException, LGTMDataFilters +# This is very similar to SimpleProject. If I had discovered SimpleProject earlier +# I would have built this code around that. class ProjectBuild: def __init__(self, project: dict): self.project = project - self.name = project["name"] + self.display_name = project["display_name"] self.id = project["id"] self.type = project["type"] @@ -21,7 +23,7 @@ def build_successful(self, followed_projects: List[dict]) -> bool: # A throttle that although may not be necessary a nice plus. time.sleep(2) site = LGTMSite.create_from_file() - data = site.retrieve_project(self.name) + data = site.retrieve_project(self.display_name) # A failed protoproject build will always be intrepreted to LGTM as a project that can't be found. if 'code' in data and data['code'] == 404: @@ -55,12 +57,12 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: in_state = False for project in followed_projects: - if project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.name and project.get('protoproject')['state'] == state: + if project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.display_name and project.get('protoproject')['state'] == state: in_state = True break # Real projects always have successful builds, or at least as far as I can tell. - if project.get('realProject') is not None and project.get('realProject')[0]['displayName'] == self.name: + if project.get('realProject') is not None and project.get('realProject')[0]['displayName'] == self.display_name: if state == "build_attempt_in_progress" or state == "build_attempt_failed": in_state == False else: @@ -71,16 +73,15 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: return in_state def project_currently_followed(self, followed_projects: List[dict]) -> bool: - part_of_cache = False + part_of_followed_projects = False for project in followed_projects: - if ( - project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.name or - project.get('realProject') is not None and project.get('realProject')[0]['displayName'] == self.name - ): - part_of_cache = True + simple_project = LGTMDataFilters.build_simple_project(project) + + if (simple_project.display_name == self.display_name): + part_of_followed_projects = True break - return part_of_cache + return part_of_followed_projects class ProjectBuilds: def __init__(self, projects: List[ProjectBuild]): @@ -93,7 +94,7 @@ def unfollow_projects(self, site: 'LGTMSite'): if project.realProject(): self.unfollow_real_project(project.id) else: - data = site.retrieve_project(project.name) + data = site.retrieve_project(project.display_name) # A failed protoproject build will always be intrepreted to LGTM # as a project that can't be found. @@ -119,7 +120,7 @@ def unfollow_real_project(id: int): site.unfollow_repository_by_id(data['id']) except LGTMRequestException as e: - print(f"An unknown issue occurred unfollowing {project.name}") + print(f"An unknown issue occurred unfollowing {project.display_name}") def return_successful_project_builds(self, site: 'LGTMSite') -> List[str]: filtered_project_ids: List[str] = [] @@ -165,7 +166,7 @@ def get_project_builds(cached_file: str) -> ProjectBuilds: for i, project in enumerate(project_data): project_data[i] = ProjectBuild({ - "name": project.split(",")[0], + "display_name": project.split(",")[0], "id": project.split(",")[1], "type": project.split(",")[2], }) From fda2a9f2984b41586017a3d54beefce9e7dfb550 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:27:46 -0500 Subject: [PATCH 42/57] Add SimpleProject#project_type method --- lgtm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lgtm.py b/lgtm.py index 0174778..2e2210f 100644 --- a/lgtm.py +++ b/lgtm.py @@ -293,6 +293,11 @@ def make_post_data(self): data_dict_key: self.key } + def project_type(self) -> str: + if is_protoproject: + return 'protoproject' + elif not is_protoproject: + return 'realProject' class LGTMDataFilters: From b51fced324b8cac31b0049df023a4bb15a34f129 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:28:13 -0500 Subject: [PATCH 43/57] Continue refactoring how we determine LGTM project types --- follow_repos_by_search_term.py | 18 +++++------------- follow_top_repos_by_star_count.py | 15 ++++----------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index f4e3f4c..efbd401 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -1,5 +1,5 @@ from typing import List -from lgtm import LGTMSite +from lgtm import LGTMSite, LGTMDataFilters import utils.cacher import utils.github_dates @@ -39,18 +39,10 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] saved_project = save_project_to_lgtm(site, repo.full_name) - # TODO: This process is duplicated elsewhere and should be under one location - # We only save realProjects to the cache since those are the only - # ones we can actually process. - if "realProject" in saved_project: - saved_project_name = saved_project['realProject'][0]['displayName'] - saved_project_id = saved_project['realProject'][0]['key'] - saved_project_data.append(f'{saved_project_name},{saved_project_id},realProject') - - if "protoproject" in saved_project: - saved_project_name = saved_project['protoproject']['displayName'] - saved_project_id = saved_project['protoproject']['key'] - saved_project_data.append(f'{saved_project_name},{saved_project_id},protoproject') + simple_project = LGTMDataFilters.build_simple_project(saved_project) + + if simple_project.is_valid_project: + saved_project_data.append(f'{simple_project.display_name},{simple_project.key},{simple_project.project_type}') return saved_project_data diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 0184906..62d7f2c 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -1,5 +1,5 @@ from typing import List -from lgtm import LGTMSite +from lgtm import LGTMSite, LGTMDataFilters import utils.github_dates import utils.github_api @@ -37,17 +37,10 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: continue saved_project = save_project_to_lgtm(site, repo.full_name) + simple_project = LGTMDataFilters.build_simple_project(saved_project) - # TODO: This process is duplicated elsewhere and should be under one location - if "realProject" in saved_project: - saved_project_name = saved_project['realProject'][0]['displayName'] - saved_project_id = saved_project['realProject'][0]['key'] - saved_project_data.append(f'{saved_project_name},{saved_project_id},realProject') - - if "protoproject" in saved_project: - saved_project_name = saved_project['protoproject']['displayName'] - saved_project_id = saved_project['protoproject']['key'] - saved_project_data.append(f'{saved_project_name},{saved_project_id},protoproject') + if simple_project.is_valid_project: + saved_project_data.append(f'{simple_project.display_name},{simple_project.key},{simple_project.project_type}') return saved_project_data From 0b182b996547dd21c6fafbdd248a121d28b64e32 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:46:48 -0500 Subject: [PATCH 44/57] Rename ProjectBuild#id to #key --- utils/cacher.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index 0e92335..e04c559 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -9,7 +9,7 @@ class ProjectBuild: def __init__(self, project: dict): self.project = project self.display_name = project["display_name"] - self.id = project["id"] + self.key = project["key"] self.type = project["type"] def realProject(self) -> bool: @@ -33,7 +33,7 @@ def build_successful(self, followed_projects: List[dict]) -> bool: # we check the language status to confirm the build succeeded. for language in data['languages']: if language['status'] == "success": - self.id = data['id'] + self.key = data['id'] return True return ( @@ -92,7 +92,7 @@ def unfollow_projects(self, site: 'LGTMSite'): time.sleep(2) if project.realProject(): - self.unfollow_real_project(project.id) + self.unfollow_real_project(project.key) else: data = site.retrieve_project(project.display_name) @@ -118,19 +118,19 @@ def unfollow_real_project(id: int): try: time.sleep(2) - site.unfollow_repository_by_id(data['id']) + site.unfollow_repository_by_id(id) except LGTMRequestException as e: print(f"An unknown issue occurred unfollowing {project.display_name}") def return_successful_project_builds(self, site: 'LGTMSite') -> List[str]: - filtered_project_ids: List[str] = [] + filtered_project_keys: List[str] = [] followed_projects = site.get_my_projects() for project in self.projects: if project.build_successful(followed_projects): - filtered_project_ids.append(project.id) + filtered_project_keys.append(project.key) - return filtered_project_ids + return filtered_project_keys def build_processes_in_progress(self, followed_projects: List[dict]) -> bool: in_progress = False @@ -146,13 +146,13 @@ def create_cache_folder(): if not os.path.exists('cache'): os.makedirs('cache') -def write_project_data_to_file(project_ids: List[str], file_name: str): +def write_project_data_to_file(project_keys: List[str], file_name: str): create_cache_folder() file = open("cache/" + file_name + ".txt", "a") - for project_id in project_ids: - file.write(project_id + "\n") + for project_key in project_keys: + file.write(project_key + "\n") file.close() @@ -167,7 +167,7 @@ def get_project_builds(cached_file: str) -> ProjectBuilds: for i, project in enumerate(project_data): project_data[i] = ProjectBuild({ "display_name": project.split(",")[0], - "id": project.split(",")[1], + "key": project.split(",")[1], "type": project.split(",")[2], }) From c6db48703d0ad3e373f93ea59aa6f924add2404e Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 14:48:38 -0500 Subject: [PATCH 45/57] Update comment on refactoring --- lgtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lgtm.py b/lgtm.py index 2e2210f..00bad87 100644 --- a/lgtm.py +++ b/lgtm.py @@ -359,8 +359,8 @@ def build_simple_project(project: dict) -> SimpleProject: is_protoproject = False else: # I don't know why this is here. Considering we have a new setup where - # we check the object to see if it's a valid project, could we not just - # set is_valid_project to False and let the code elsewhere catch that? + # we check the object to see if it's a valid project via is_valid_project, could + # we not just set is_valid_project to False and let the code elsewhere catch this problem? raise KeyError('\'realProject\' nor \'protoproject\' in %s' % str(project)) return SimpleProject( From 9eb9c4a4afc2a357fbbac49c0fc9223013395f4f Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:09:02 -0500 Subject: [PATCH 46/57] Refactor SimpleProject to store the project type --- auto_sort_projects.py | 2 +- lgtm.py | 35 +++++++++++++++++++---------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/auto_sort_projects.py b/auto_sort_projects.py index 9c4a82f..929c579 100644 --- a/auto_sort_projects.py +++ b/auto_sort_projects.py @@ -71,7 +71,7 @@ project_list_name = gh_org_to_project_list_name[org] project_list_id = site.get_or_create_project_list(project_list_name) for project in org_to_projects[org]: - if project.is_protoproject: + if project.is_protoproject(): print('Unable to add project to project list since it is a protoproject. %s' % project) continue site.load_into_project_list(project_list_id, [project.key]) diff --git a/lgtm.py b/lgtm.py index 00bad87..8a66ef1 100644 --- a/lgtm.py +++ b/lgtm.py @@ -134,7 +134,7 @@ def force_rebuild_all_proto_projects(self): org_to_projects = LGTMDataFilters.org_to_ids(self.get_my_projects()) for org in org_to_projects: for project in org_to_projects[org]: - if not project.is_protoproject: + if not project.is_protoproject(): continue self.force_rebuild_project(project) @@ -186,7 +186,7 @@ def unfollow_proto_repository_by_id(self, project_id: str): self._make_lgtm_post(url, data) def unfollow_repository(self, simple_project: 'SimpleProject'): - url = "https://lgtm.com/internal_api/v0.2/unfollowProject" if not simple_project.is_protoproject \ + url = "https://lgtm.com/internal_api/v0.2/unfollowProject" if not simple_project.is_protoproject() \ else "https://lgtm.com/internal_api/v0.2/unfollowProtoproject" data = simple_project.make_post_data() self._make_lgtm_post(url, data) @@ -194,7 +194,7 @@ def unfollow_repository(self, simple_project: 'SimpleProject'): def unfollow_repository_by_org(self, org: str, include_protoproject: bool = False): projects_under_org = self.get_my_projects_under_org(org) for project in projects_under_org: - if not include_protoproject and project.is_protoproject: + if not include_protoproject and project.is_protoproject(): print("Not unfollowing project since it is a protoproject. %s" % project) continue print('Unfollowing project %s' % project.display_name) @@ -280,24 +280,24 @@ def create_from_file() -> 'LGTMSite': @dataclass +# TODO: this SimpleProject is no longer 'simple'. Some refactoring here could be nice. class SimpleProject: display_name: str key: str - is_protoproject: bool + project_type: str is_valid_project: bool org: str + state: str def make_post_data(self): - data_dict_key = 'protoproject_key' if self.is_protoproject else 'project_key' + data_dict_key = 'protoproject_key' if self.is_protoproject() else 'project_key' return { data_dict_key: self.key } - def project_type(self) -> str: - if is_protoproject: - return 'protoproject' - elif not is_protoproject: - return 'realProject' + def is_protoproject(self): + # The values for project_type should be hardcoded in one central location + return self.project_type == "protoproject" class LGTMDataFilters: @@ -331,12 +331,14 @@ def extract_project_under_org(org: str, projects_sorted: Dict[str, List[SimplePr return projects_sorted[org] @staticmethod + # TODO: This probably needs to be inside the SimpleProject class. def build_simple_project(project: dict) -> SimpleProject: org: str display_name: str key: str - is_protoproject: bool + project_type: str is_valid_project: bool = True + state: str if 'protoproject' in project: the_project = project['protoproject'] @@ -344,11 +346,11 @@ def build_simple_project(project: dict) -> SimpleProject: # Not really concerned with BitBucket right now is_valid_project = False display_name = the_project['displayName'] + state = the_project['state'] org = display_name.split('/')[0] key = the_project['key'] - is_protoproject = True + project_type = 'protoproject' elif 'realProject' in project: - the_project = project['realProject'][0] if the_project['repoProvider'] != 'github_apps': # Not really concerned with BitBucket right now @@ -356,7 +358,7 @@ def build_simple_project(project: dict) -> SimpleProject: org = str(the_project['slug']).split('/')[1] display_name = the_project['displayName'] key = the_project['key'] - is_protoproject = False + project_type = "realProject" else: # I don't know why this is here. Considering we have a new setup where # we check the object to see if it's a valid project via is_valid_project, could @@ -366,7 +368,8 @@ def build_simple_project(project: dict) -> SimpleProject: return SimpleProject( display_name=display_name, key=key, - is_protoproject=is_protoproject, + project_type=project_type, is_valid_project=is_valid_project, - org=org + org=org, + state=state ) From 0ba1576b00c8fdfec58b030c2b776832284f1b1b Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:09:32 -0500 Subject: [PATCH 47/57] Simplify logic in determining project state --- utils/cacher.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index e04c559..31a8617 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -57,18 +57,20 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: in_state = False for project in followed_projects: - if project.get('protoproject') is not None and project.get('protoproject')['displayName'] == self.display_name and project.get('protoproject')['state'] == state: - in_state = True - break + simple_project = LGTMDataFilters.build_simple_project(project) - # Real projects always have successful builds, or at least as far as I can tell. - if project.get('realProject') is not None and project.get('realProject')[0]['displayName'] == self.display_name: - if state == "build_attempt_in_progress" or state == "build_attempt_failed": - in_state == False - else: + if simple_project.display_name == self.display_name: + if simple_project.is_protoproject() and simple_project.state == state: in_state = True - - break + break + + # Real projects always have successful builds, or at least as far as I can tell. + if not simple_project.is_protoproject(): + if state == "build_attempt_in_progress" or state == "build_attempt_failed": + in_state == False + else: + in_state = True + break return in_state From 476faeb2c0734a519a2ca83cc9b56c4dc0e33dcb Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:12:40 -0500 Subject: [PATCH 48/57] Add comments --- follow_repos_by_search_term.py | 6 ++++-- follow_top_repos_by_star_count.py | 3 ++- utils/cacher.py | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/follow_repos_by_search_term.py b/follow_repos_by_search_term.py index efbd401..30f6ca3 100644 --- a/follow_repos_by_search_term.py +++ b/follow_repos_by_search_term.py @@ -28,7 +28,8 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] for date_range in utils.github_dates.generate_dates(): repos = github.search_repositories(query=f'stars:>5 language:{language} fork:false created:{date_range} {search_term}') - # TODO: This occasionally returns requests.exceptions.ConnectionError which is annoying as hell. It would be nice if we built in exception handling. + # TODO: This occasionally returns requests.exceptions.ConnectionError which is annoying as hell. + # It would be nice if we built in exception handling. for repo in repos: # Github has rate limiting in place hence why we add a sleep here. More info can be found here: # https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting @@ -42,7 +43,8 @@ def find_and_save_projects_to_lgtm(language: str, search_term: str) -> List[str] simple_project = LGTMDataFilters.build_simple_project(saved_project) if simple_project.is_valid_project: - saved_project_data.append(f'{simple_project.display_name},{simple_project.key},{simple_project.project_type}') + saved_data = f'{simple_project.display_name},{simple_project.key},{simple_project.project_type}' + saved_project_data.append(saved_data) return saved_project_data diff --git a/follow_top_repos_by_star_count.py b/follow_top_repos_by_star_count.py index 62d7f2c..8d996f9 100644 --- a/follow_top_repos_by_star_count.py +++ b/follow_top_repos_by_star_count.py @@ -40,7 +40,8 @@ def find_and_save_projects_to_lgtm(language: str) -> List[str]: simple_project = LGTMDataFilters.build_simple_project(saved_project) if simple_project.is_valid_project: - saved_project_data.append(f'{simple_project.display_name},{simple_project.key},{simple_project.project_type}') + saved_data = f'{simple_project.display_name},{simple_project.key},{simple_project.project_type}' + saved_project_data.append(saved_data) return saved_project_data diff --git a/utils/cacher.py b/utils/cacher.py index 31a8617..b0262ef 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -167,6 +167,7 @@ def get_project_builds(cached_file: str) -> ProjectBuilds: project_data.remove("") for i, project in enumerate(project_data): + # Any way we can just make this a SimpleProject and not a ProjectBuild? project_data[i] = ProjectBuild({ "display_name": project.split(",")[0], "key": project.split(",")[1], From 2c0d44b747e514ee36461ecc9afedc1f8376cc35 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:15:49 -0500 Subject: [PATCH 49/57] Refactor logic with guard clauses --- utils/cacher.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index b0262ef..6aeb2c8 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -59,18 +59,17 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: for project in followed_projects: simple_project = LGTMDataFilters.build_simple_project(project) - if simple_project.display_name == self.display_name: - if simple_project.is_protoproject() and simple_project.state == state: - in_state = True - break - - # Real projects always have successful builds, or at least as far as I can tell. - if not simple_project.is_protoproject(): - if state == "build_attempt_in_progress" or state == "build_attempt_failed": - in_state == False - else: - in_state = True - break + if not simple_project.display_name == self.display_name: + continue + + if simple_project.is_protoproject() and simple_project.state == state: + in_state = True + break + + # Real projects always have successful builds, or at least as far as I can tell. + if not simple_project.is_protoproject(): + in_state = !(state == "build_attempt_in_progress" or state == "build_attempt_failed") + break return in_state From 574d0f6519191bf9a0540df3eff1ff12869df32e Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:23:50 -0500 Subject: [PATCH 50/57] Add unfollow_all_followed_projects.py script --- README.md | 5 ++++- unfollow_all_followed_projects.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 unfollow_all_followed_projects.py diff --git a/README.md b/README.md index 1e756be..30a1a6b 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,9 @@ python3 follow_repos_by_search_term.py (optional) + +# Unfollows all projects you're currently following that are not in a custom list. +python3 unfollow_all_followed_projects.py ``` ## The Custom Projects Lists Feature @@ -104,7 +107,7 @@ LGTM can't move projects that are being processed into custom lists. To resolve > The can't be processed at this time because a project build is still in progress. -If you receive this error, wait a few hours and run the script again. +If you receive this error, wait a few hours and run the script again. ## Legal diff --git a/unfollow_all_followed_projects.py b/unfollow_all_followed_projects.py new file mode 100644 index 0000000..20233f4 --- /dev/null +++ b/unfollow_all_followed_projects.py @@ -0,0 +1,10 @@ +from lgtm import LGTMSite, SimpleProject, LGTMDataFilters +import time + +site = LGTMSite.create_from_file() + +projects = site.get_my_projects() + +for project in projects: + simple_project = LGTMDataFilters.build_simple_project(project) + site.unfollow_repository(simple_project) From 803ebd759944f2a6682fc274a2028eee50c14c13 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:24:04 -0500 Subject: [PATCH 51/57] Convert ProjectBuild to a subclass of SimpleProject --- utils/cacher.py | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index 6aeb2c8..3c4e9dd 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -5,21 +5,9 @@ # This is very similar to SimpleProject. If I had discovered SimpleProject earlier # I would have built this code around that. -class ProjectBuild: - def __init__(self, project: dict): - self.project = project - self.display_name = project["display_name"] - self.key = project["key"] - self.type = project["type"] - - def realProject(self) -> bool: - return self.type == "realProject" - - def protoproject(self) -> bool: - return self.type == "protoproject" - +class ProjectBuild(SimpleProject): def build_successful(self, followed_projects: List[dict]) -> bool: - if self.protoproject: + if self.is_protoproject(): # A throttle that although may not be necessary a nice plus. time.sleep(2) site = LGTMSite.create_from_file() @@ -74,15 +62,15 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: return in_state def project_currently_followed(self, followed_projects: List[dict]) -> bool: - part_of_followed_projects = False + currently_followed = False for project in followed_projects: simple_project = LGTMDataFilters.build_simple_project(project) if (simple_project.display_name == self.display_name): - part_of_followed_projects = True + currently_followed = True break - return part_of_followed_projects + return currently_followed class ProjectBuilds: def __init__(self, projects: List[ProjectBuild]): @@ -92,9 +80,7 @@ def unfollow_projects(self, site: 'LGTMSite'): for project in self.projects: time.sleep(2) - if project.realProject(): - self.unfollow_real_project(project.key) - else: + if project.is_protoproject(): data = site.retrieve_project(project.display_name) # A failed protoproject build will always be intrepreted to LGTM @@ -103,6 +89,9 @@ def unfollow_projects(self, site: 'LGTMSite'): continue self.unfollow_proto_project(data['id']) + else: + self.unfollow_real_project(project.key) + def unfollow_proto_project(id: int): try: @@ -160,22 +149,24 @@ def write_project_data_to_file(project_keys: List[str], file_name: str): def get_project_builds(cached_file: str) -> ProjectBuilds: file = open(cached_file, "r") - project_data = file.read().split("\n") + cached_projects = file.read().split("\n") while("" in project_data): - project_data.remove("") + cached_projects.remove("") - for i, project in enumerate(project_data): - # Any way we can just make this a SimpleProject and not a ProjectBuild? - project_data[i] = ProjectBuild({ + for i, project in enumerate(cached_projects): + cached_projects[i] = ProjectBuild({ "display_name": project.split(",")[0], "key": project.split(",")[1], - "type": project.split(",")[2], + "project_type": project.split(",")[2], + "is_valid_project": True, + "org": "", + "state": "" }) file.close() - return ProjectBuilds(project_data) + return ProjectBuilds(cached_projects) def remove_file(file_name: str): os.remove(file_name) From 69b66148f818f443550927014a7e8face67d8287 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:28:51 -0500 Subject: [PATCH 52/57] Refactor simple project build to not raise error --- lgtm.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lgtm.py b/lgtm.py index 8a66ef1..fb04794 100644 --- a/lgtm.py +++ b/lgtm.py @@ -331,7 +331,6 @@ def extract_project_under_org(org: str, projects_sorted: Dict[str, List[SimplePr return projects_sorted[org] @staticmethod - # TODO: This probably needs to be inside the SimpleProject class. def build_simple_project(project: dict) -> SimpleProject: org: str display_name: str @@ -360,10 +359,9 @@ def build_simple_project(project: dict) -> SimpleProject: key = the_project['key'] project_type = "realProject" else: - # I don't know why this is here. Considering we have a new setup where - # we check the object to see if it's a valid project via is_valid_project, could - # we not just set is_valid_project to False and let the code elsewhere catch this problem? - raise KeyError('\'realProject\' nor \'protoproject\' in %s' % str(project)) + # We raise this in cases where we can't intrepret the data we get + # back from LGTM. + is_valid_project = False return SimpleProject( display_name=display_name, From 89a25d7f2e4c7580089498b6976e3177657b5983 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:29:21 -0500 Subject: [PATCH 53/57] Add checks confirming LGTM project is valid --- unfollow_all_followed_projects.py | 3 ++- utils/cacher.py | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/unfollow_all_followed_projects.py b/unfollow_all_followed_projects.py index 20233f4..dd565d2 100644 --- a/unfollow_all_followed_projects.py +++ b/unfollow_all_followed_projects.py @@ -7,4 +7,5 @@ for project in projects: simple_project = LGTMDataFilters.build_simple_project(project) - site.unfollow_repository(simple_project) + if simple_project.is_valid_project: + site.unfollow_repository(simple_project) diff --git a/utils/cacher.py b/utils/cacher.py index 3c4e9dd..589bb59 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -47,6 +47,9 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: for project in followed_projects: simple_project = LGTMDataFilters.build_simple_project(project) + if not simple_project.is_valid_project: + continue + if not simple_project.display_name == self.display_name: continue @@ -66,7 +69,10 @@ def project_currently_followed(self, followed_projects: List[dict]) -> bool: for project in followed_projects: simple_project = LGTMDataFilters.build_simple_project(project) - if (simple_project.display_name == self.display_name): + if not simple_project.is_valid_project: + continue + + if simple_project.display_name == self.display_name: currently_followed = True break From f44b9593baf287fd573d37dbc2f7d95ed6ce0fe5 Mon Sep 17 00:00:00 2001 From: Thank You Date: Wed, 3 Mar 2021 15:58:54 -0500 Subject: [PATCH 54/57] Fix misc errors --- lgtm.py | 2 +- utils/cacher.py | 42 ++++++++++++++++++++++++++---------------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/lgtm.py b/lgtm.py index fb04794..6263a80 100644 --- a/lgtm.py +++ b/lgtm.py @@ -337,7 +337,7 @@ def build_simple_project(project: dict) -> SimpleProject: key: str project_type: str is_valid_project: bool = True - state: str + state: str = "" if 'protoproject' in project: the_project = project['protoproject'] diff --git a/utils/cacher.py b/utils/cacher.py index 589bb59..d98b0e3 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -1,7 +1,7 @@ from typing import List import os import time -from lgtm import LGTMSite, LGTMRequestException, LGTMDataFilters +from lgtm import LGTMSite, LGTMRequestException, LGTMDataFilters, SimpleProject # This is very similar to SimpleProject. If I had discovered SimpleProject earlier # I would have built this code around that. @@ -59,7 +59,7 @@ def project_state(self, state: str, followed_projects: List[dict]) -> bool: # Real projects always have successful builds, or at least as far as I can tell. if not simple_project.is_protoproject(): - in_state = !(state == "build_attempt_in_progress" or state == "build_attempt_failed") + in_state = not (state == "build_attempt_in_progress" or state == "build_attempt_failed") break return in_state @@ -87,6 +87,9 @@ def unfollow_projects(self, site: 'LGTMSite'): time.sleep(2) if project.is_protoproject(): + # Protoprojects are gnarly because I believe LGTM updates the key + # if the protoproject succeeds. In case it does, we retrieve the + # latest id from LGTM then unfollow it. data = site.retrieve_project(project.display_name) # A failed protoproject build will always be intrepreted to LGTM @@ -94,12 +97,12 @@ def unfollow_projects(self, site: 'LGTMSite'): if 'code' in data and data['code'] == 404: continue - self.unfollow_proto_project(data['id']) + self.unfollow_proto_project(site, data['id']) else: - self.unfollow_real_project(project.key) + self.unfollow_real_project(site, project.key) - def unfollow_proto_project(id: int): + def unfollow_proto_project(self, site: 'LGTMSite', id: int): try: time.sleep(2) @@ -108,9 +111,9 @@ def unfollow_proto_project(id: int): # In some cases even though we've recorded the project as a protoproject # it's actually a realproject. So we can't unfollow it via a proto-project # unfollow API call. We can however unfollow it via the real project API call. - self.unfollow_real_project(id) + self.unfollow_real_project(site, id) - def unfollow_real_project(id: int): + def unfollow_real_project(self, site: 'LGTMSite', id: int): try: time.sleep(2) @@ -157,18 +160,25 @@ def get_project_builds(cached_file: str) -> ProjectBuilds: cached_projects = file.read().split("\n") - while("" in project_data): + while("" in cached_projects): cached_projects.remove("") for i, project in enumerate(cached_projects): - cached_projects[i] = ProjectBuild({ - "display_name": project.split(",")[0], - "key": project.split(",")[1], - "project_type": project.split(",")[2], - "is_valid_project": True, - "org": "", - "state": "" - }) + cached_projects[i] = ProjectBuild( + display_name=project.split(",")[0], + key=project.split(",")[1], + project_type=project.split(",")[2], + is_valid_project=True, + org="", + state="" + ) + + # display_name: str + # key: str + # project_type: str + # is_valid_project: bool + # org: str + # state: str file.close() From 9ba28cf51932fb87f54cbd427e3d1df1766d3b84 Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 4 Mar 2021 10:04:49 -0500 Subject: [PATCH 55/57] Reword comment --- utils/cacher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/cacher.py b/utils/cacher.py index d98b0e3..c9260f2 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -92,7 +92,7 @@ def unfollow_projects(self, site: 'LGTMSite'): # latest id from LGTM then unfollow it. data = site.retrieve_project(project.display_name) - # A failed protoproject build will always be intrepreted to LGTM + # A failed protoproject build will be intrepreted to LGTM # as a project that can't be found. if 'code' in data and data['code'] == 404: continue From 2e1d5950ac2a772634116e8f7ee55f16256416a5 Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 4 Mar 2021 10:07:19 -0500 Subject: [PATCH 56/57] Remove unnecessary code --- unfollow_all_followed_projects.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unfollow_all_followed_projects.py b/unfollow_all_followed_projects.py index dd565d2..15a1bb7 100644 --- a/unfollow_all_followed_projects.py +++ b/unfollow_all_followed_projects.py @@ -1,5 +1,4 @@ -from lgtm import LGTMSite, SimpleProject, LGTMDataFilters -import time +from lgtm import LGTMSite, LGTMDataFilters site = LGTMSite.create_from_file() From 5ea2b9a966e965b8f1eeb8e15397e7ea5ad788e9 Mon Sep 17 00:00:00 2001 From: Thank You Date: Thu, 4 Mar 2021 10:23:28 -0500 Subject: [PATCH 57/57] Remove comments --- utils/cacher.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/utils/cacher.py b/utils/cacher.py index c9260f2..a2334bd 100644 --- a/utils/cacher.py +++ b/utils/cacher.py @@ -173,13 +173,6 @@ def get_project_builds(cached_file: str) -> ProjectBuilds: state="" ) - # display_name: str - # key: str - # project_type: str - # is_valid_project: bool - # org: str - # state: str - file.close() return ProjectBuilds(cached_projects)