Skip to content

Commit

Permalink
Cherrypick ca95ebc From #179 | branch scraper-strengthen
Browse files Browse the repository at this point in the history
  • Loading branch information
Foohy committed May 13, 2024
1 parent 9783ad1 commit f454281
Showing 1 changed file with 18 additions and 7 deletions.
25 changes: 18 additions & 7 deletions other/scraper/scrape.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python

import sys
import json
import time
Expand All @@ -11,16 +13,24 @@
DELAY = 0.1 # How long to delay between requests
FILENAME = "addons.txt"

ignore_words = ["content", "server"]
# Not a whole word search, so nav also gets navmesh
ignore_words = [
"content",
"server",
"nav",
"node",
"icon"
]

ignore_reg = "(?<!_){0}(?!_)" # Allow ignore words to be a part of the map name (surrounding underscores)
def containsIgnoreWord(str, word):
return re.search(ignore_reg.format(word), str) is not None
return re.search(ignore_reg.format(word), str, flags=re.IGNORECASE) is not None

def containsIgnoreWords(str):
for word in ignore_words:
if containsIgnoreWord(str, word):
return True

return False

if __name__ == "__main__":
Expand All @@ -45,7 +55,9 @@ def containsIgnoreWords(str):
total = resobj["response"]["total"]

for addon in resobj["response"]["publishedfiledetails"]:
if "title" in addon and containsIgnoreWords(addon["title"]):
hasignorewords = "title" in addon and containsIgnoreWords(addon["title"])
sexyfuntimes = "maybe_inappropriate_sex" in addon and addon["maybe_inappropriate_sex"] == True
if hasignorewords or sexyfuntimes:
ign_str = u"Ignoring: " + addon["title"]
print(ign_str.encode('utf-8'))
continue
Expand All @@ -64,10 +76,10 @@ def containsIgnoreWords(str):

if page * NUMPERPAGE > resobj["response"]["total"]:
break
else:
else:
# so valve doesn't get angry at us
time.sleep(DELAY)

# Results come back sorted, but reverse it so
# newer entries are added at the end instead of shifting everything at the beginning
workshopids.reverse()
Expand All @@ -78,4 +90,3 @@ def containsIgnoreWords(str):

print("Finished!!")
f.close()

0 comments on commit f454281

Please sign in to comment.