Skip to content

Commit

Permalink
Handle domain parsing correctly #10
Browse files Browse the repository at this point in the history
  • Loading branch information
ndfred committed Apr 27, 2019
1 parent cbde973 commit 3d7fb1f
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 7 deletions.
28 changes: 21 additions & 7 deletions build_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
OUTPUT_HOSTS_PATH = 'hosts.txt'
OUTPUT_DOMAINS_PATH = 'domains.txt'
BLACKHOLE_IP = '0.0.0.0'
DOMAIN_EXTENSIONS_URL = 'https://publicsuffix.org/list/public_suffix_list.dat'
DOMAIN_EXTENSIONS = None

def get_domain_extensions():
global DOMAIN_EXTENSIONS

if DOMAIN_EXTENSIONS == None:
DOMAIN_EXTENSIONS = frozenset(parse_host_file(DOMAIN_EXTENSIONS_URL))

return DOMAIN_EXTENSIONS

def download_file(url):
request = urllib2.Request(url)
Expand All @@ -33,12 +43,16 @@ def cleanup_domain_line(line):
return line

def is_domain(domain):
# Take .co.uk into account?
if domain.count('.') > 1:
return False
else:
if not '.' in domain:
return True

extension = domain[domain.index('.') + 1:]

if extension in get_domain_extensions():
return True

return False

def parse_domain_line(line):
original_line = line
line = cleanup_domain_line(line)
Expand Down Expand Up @@ -71,7 +85,7 @@ def parse_host_file(url):
yield domain

if not found_domains:
raise Exception('Couldn\'t find any domains in that URL')
raise Exception('Couldn\'t find any domains in URL %s' % url)

def output_hosts(ads_lists_ulrs=FIREBOG_CONSERVATIVE_URLS_LIST, output_hosts_path=OUTPUT_HOSTS_PATH, output_domains_path=OUTPUT_DOMAINS_PATH, blackhole_ip=BLACKHOLE_IP):
ads_lists = download_ads_list_urls(ads_lists_ulrs)
Expand All @@ -87,9 +101,9 @@ def output_hosts(ads_lists_ulrs=FIREBOG_CONSERVATIVE_URLS_LIST, output_hosts_pat
with open(output_domains_path, 'w') as domains_file:
for domain in domains:
if is_domain(domain):
hosts_file.write('%s %s\n' % (blackhole_ip, domain))
else:
domains_file.write('%s %s\n' % (blackhole_ip, domain))
else:
hosts_file.write('%s %s\n' % (blackhole_ip, domain))

print 'Wrote %d host names in %s and %s' % (len(domains), output_hosts_path, output_domains_path)

Expand Down
5 changes: 5 additions & 0 deletions test_build_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ def test_Hostsfile(self):
self.assertEqual(parse_domain_line('127.0.0.1 005.free-counter.co.uk'), '005.free-counter.co.uk')
self.assertEqual(parse_domain_line('127.0.0.1 118d654612df63bc8395-aecfeaabe29a34ea9a877711ec6d8aed.r37.cf2.rackcdn.com'), '118d654612df63bc8395-aecfeaabe29a34ea9a877711ec6d8aed.r37.cf2.rackcdn.com')

def test_get_domain_extensions(self):
domain_extensions = get_domain_extensions()
self.assertTrue('com' in domain_extensions)
self.assertTrue('co.uk' in domain_extensions)

def test_is_domain(self):
self.assertTrue(is_domain('google.com'))
self.assertFalse(is_domain('www.google.com'))
Expand Down

0 comments on commit 3d7fb1f

Please sign in to comment.