diff --git a/other/scraper/scrape.py b/other/scraper/scrape.py index 07b96a03..3e90121a 100644 --- a/other/scraper/scrape.py +++ b/other/scraper/scrape.py @@ -1,7 +1,10 @@ +#!/usr/bin/env python + import sys import json import time import urllib.request +import urllib.parse import re HOST = "http://api.steampowered.com" @@ -11,16 +14,24 @@ DELAY = 0.1 # How long to delay between requests FILENAME = "addons.txt" -ignore_words = ["content", "server"] +# Not a whole word search, so nav also gets navmesh +ignore_words = [ + "content", + "server", + "nav", + "node", + "icon" +] + ignore_reg = "(? resobj["response"]["total"]: + if page * NUMPERPAGE > response["total"]: break - else: + else: # so valve doesn't get angry at us time.sleep(DELAY) - + # Results come back sorted, but reverse it so # newer entries are added at the end instead of shifting everything at the beginning workshopids.reverse() @@ -78,4 +95,3 @@ def containsIgnoreWords(str): print("Finished!!") f.close() -