itcook · elder-frog · Nov 18, 2016
diff --git a/gfwlist2pac/main.py b/gfwlist2pac/main.py
@@ -2,7 +2,8 @@
 # -*- coding: utf-8 -*-
 
 import pkgutil
-import urlparse
+import base64
+import urllib.parse
 import json
 import logging
 from argparse import ArgumentParser
@@ -13,82 +14,82 @@
 def parse_args():
     parser = ArgumentParser()
     parser.add_argument('-i', '--input', dest='input', required=True,
-                      help='path to gfwlist', metavar='GFWLIST')
+                        help='path to gfwlist', metavar='GFWLIST')
     parser.add_argument('-f', '--file', dest='output', required=True,
-                      help='path to output pac', metavar='PAC')
+                        help='path to output pac', metavar='PAC')
+
     parser.add_argument('-p', '--proxy', dest='proxy', required=True,
-                        help='the proxy parameter in the pac file, for example,\
-                        "SOCKS5 127.0.0.1:1080;"', metavar='PROXY')
+                        help='the proxy parameter ini the pac file, for example, \
+        SOCKS5 127.0.0.1:1080;', metavar='PROXY')
     return parser.parse_args()
 
 
 def decode_gfwlist(content):
-    # decode base64 if have to
     try:
-        return content.decode('base64')
-    except:
+        return base64.decodebytes(content)
+    except Exception as e:
+        logging.error(e)
         return content
 
 
-def get_hostname(something):
+def get_hostname(element):
     try:
-        # quite enough for GFW
-        if not something.startswith('http:'):
-            something = 'http://' + something
-        r = urlparse.urlparse(something)
+        if not element.startswith(b'http:'):
+            element = b'http://' + element
+        r = urllib.parse.urlparse(element)
         return r.hostname
     except Exception as e:
-        logging.error(e) 
+        logging.error(e)
         return None
 
 
-def add_domain_to_set(s, something):
-    hostname = get_hostname(something)
+def add_domain_to_set(s, element):
+    hostname = get_hostname(element)
     if hostname is not None:
-        if hostname.startswith('.'):
-            hostname = hostname.lstrip('.')
-        if hostname.endswith('/'):
-            hostname = hostname.rstrip('/')
+        if hostname.startswith(b'.'):
+            hostname = hostname.lstrip(b'.')
+        if hostname.endswith(b'/'):
+            hostname = hostname.rstrip(b'/')
         if hostname:
-            s.add(hostname)
+            s.add(hostname.decode('utf-8'))
 
 
 def parse_gfwlist(content):
-    builtin_rules = pkgutil.get_data('gfwlist2pac', 'resources/builtin.txt').splitlines(False)
+    builtin_rules = pkgutil.get_data(
+        'gfwlist2pac', 'resources/builtin.txt').splitlines(False)
     gfwlist = content.splitlines(False)
-    domains = set(builtin_rules)
+    domains = set([domain.decode('utf-8') for domain in builtin_rules])
     for line in gfwlist:
-        if line.find('.*') >= 0:
+        if line.find(b'.*') >= 0:
             continue
-        elif line.find('*') >= 0:
-            line = line.replace('*', '/')
-        if line.startswith('!'):
+        if line.find(b'*') >= 0:
+            line = line.replace(b'*', b'/')
+        if line.startswith(b'!'):
             continue
-        elif line.startswith('['):
+        elif line.startswith(b'['):
             continue
-        elif line.startswith('@'):
-            # ignore white list
+        elif line.startswith(b'@'):
             continue
-        elif line.startswith('||'):
-            add_domain_to_set(domains, line.lstrip('||'))
-        elif line.startswith('|'):
-            add_domain_to_set(domains, line.lstrip('|'))
-        elif line.startswith('.'):
-            add_domain_to_set(domains, line.lstrip('.'))
+        elif line.startswith(b'||'):
+            add_domain_to_set(domains, line.lstrip(b'||'))
+        elif line.startswith(b'|'):
+            add_domain_to_set(domains, line.lstrip(b'|'))
+        elif line.startswith(b'.'):
+            add_domain_to_set(domains, line.lstrip(b'.'))
         else:
             add_domain_to_set(domains, line)
-    # TODO: reduce ['www.google.com', 'google.com'] to ['google.com']
     return domains
 
 
 def generate_pac(domains, proxy):
-    # render the pac file
     proxy_content = pkgutil.get_data('gfwlist2pac', 'resources/proxy.pac')
-    domains_dict = {}
-    for domain in domains:
-        domains_dict[domain] = 1
-    proxy_content = proxy_content.replace('__PROXY__', json.dumps(str(proxy)))
-    proxy_content = proxy_content.replace('__DOMAINS__', json.dumps(domains_dict, indent=2))
+    # domains_dict = {}
+    # for domain in domains:
+    #     domains_dict[domain.decode('utf-8')] = 1
+    proxy_content = proxy_content.replace(
+        b'__PROXY__', bytes(proxy, encoding='utf-8'))
+    proxy_content = proxy_content.replace(
+        b'__DOMAINS__', json.dumps(list(domains), indent=4).encode('utf-8'))
     return proxy_content
 
 
@@ -101,8 +102,7 @@ def main():
     pac_content = generate_pac(domains, args.proxy)
     with open(args.output, 'wb') as f:
         f.write(pac_content)
-        
+
 
 if __name__ == '__main__':
     main()
-
diff --git a/gfwlist2pac/resources/proxy.pac b/gfwlist2pac/resources/proxy.pac
@@ -1,12 +1,12 @@
 // Generated by gfwlist2pac
 // https://github.com/clowwindy/gfwlist2pac
 
-var domains = __DOMAINS__;
-
 var proxy = __PROXY__;
 
 var direct = 'DIRECT;';
 
+var domains = __DOMAINS__;
+
 function FindProxyForURL(url, host) {
     var lastPos = 0;
     var domain = host;

diff --git a/setup.py b/setup.py
@@ -19,8 +19,8 @@
     """,
     classifiers=[
         'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6',
-        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
     ],
 )