From f89e178ad386ea7a9d5d3c408b9ce7c9857ab2f9 Mon Sep 17 00:00:00 2001
From: Mika Eloranta <mel@ohmu.fi>
Date: Tue, 19 Apr 2016 19:03:35 +0300
Subject: [PATCH] add python3 support

Note that this removes support for Python 2.5 and older, but the
gain is support for all Python 3.x versions.
---
 data_hacks/bar_chart.py           | 21 ++++++++++-----------
 data_hacks/histogram.py           | 17 +++++++++--------
 data_hacks/ninety_five_percent.py | 13 +++++++------
 data_hacks/run_for.py             | 13 +++++++------
 data_hacks/sample.py              | 13 +++++++------
 5 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/data_hacks/bar_chart.py b/data_hacks/bar_chart.py
index c68af8b..f43ae60 100755
--- a/data_hacks/bar_chart.py
+++ b/data_hacks/bar_chart.py
@@ -54,20 +54,20 @@ def run(input_stream, options):
         else:
             data[row] += 1
             total += 1
-    
+
     if not data:
-        print "Error: no data"
+        print("Error: no data")
         sys.exit(1)
-    
+
     max_length = max([len(key) for key in data.keys()])
     max_length = min(max_length, 50)
     value_characters = 80 - max_length
     max_value = max(data.values())
     scale = int(math.ceil(float(max_value) / value_characters))
     scale = max(1, scale)
-    
-    print "# each " + options.dot + " represents a count of %d. total %d" % (scale, total)
-    
+
+    print("# each " + options.dot + " represents a count of %d. total %d" % (scale, total))
+
     if options.sort_values:
         data = [[value, key] for key, value in data.items()]
         data.sort(key=lambda x: x[0], reverse=options.reverse_sort)
@@ -79,13 +79,13 @@ def run(input_stream, options):
             data.sort(key=lambda x: (Decimal(x[1])), reverse=options.reverse_sort)
         else:
             data.sort(key=lambda x: x[1], reverse=options.reverse_sort)
-    
+
     str_format = "%" + str(max_length) + "s [%6d] %s%s"
     percentage = ""
     for value, key in data:
         if options.percentage:
             percentage = " (%0.2f%%)" % (100 * Decimal(value) / Decimal(total))
-        print str_format % (key[:max_length], value, (value / scale) * options.dot, percentage)
+        print(str_format % (key[:max_length], value, (value // scale) * options.dot, percentage))
 
 if __name__ == "__main__":
     parser = OptionParser()
@@ -107,10 +107,9 @@ def run(input_stream, options):
     parser.add_option("--dot", dest="dot", default='∎', help="Dot representation")
 
     (options, args) = parser.parse_args()
-    
+
     if sys.stdin.isatty():
         parser.print_usage()
-        print "for more help use --help"
+        print("for more help use --help")
         sys.exit(1)
     run(load_stream(sys.stdin), options)
-
diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 3d16cc8..57895fa 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -24,6 +24,7 @@
 https://github.com/bitly/data_hacks
 """
 
+from __future__ import print_function
 import sys
 from decimal import Decimal
 import logging
@@ -97,7 +98,7 @@ def load_stream(input_stream, agg_value_key, agg_key_value):
                 yield DataPoint(Decimal(clean_line), 1)
         except:
             logging.exception('failed %r', line)
-            print >>sys.stderr, "invalid line %r" % line
+            print("invalid line %r" % line, file=sys.stderr)
 
 
 def median(values, key=None):
@@ -105,9 +106,9 @@ def median(values, key=None):
         key = None  # map and sort accept None as identity
     length = len(values)
     if length % 2:
-        median_indeces = [length/2]
+        median_indeces = [length // 2]
     else:
-        median_indeces = [length/2-1, length/2]
+        median_indeces = [length // 2 - 1, length // 2]
 
     values = sorted(values, key=key)
     return sum(map(key,
@@ -241,7 +242,7 @@ def log_steps(k, n):
         print("# Mean = %f; Variance = %f; SD = %f; Median %f" %
               (mvsd.mean(), mvsd.var(), mvsd.sd(),
                median(accepted_data, key=lambda x: x.value)))
-    print "# each " + options.dot + " represents a count of %d" % bucket_scale
+    print("# each " + options.dot + " represents a count of %d" % bucket_scale)
     bucket_min = min_v
     bucket_max = min_v
     percentage = ""
@@ -252,12 +253,12 @@ def log_steps(k, n):
         bucket_count = bucket_counts[bucket]
         star_count = 0
         if bucket_count:
-            star_count = bucket_count / bucket_scale
+            star_count = bucket_count // bucket_scale
         if options.percentage:
             percentage = " (%0.2f%%)" % (100 * Decimal(bucket_count) /
                                          Decimal(samples))
-        print format_string % (bucket_min, bucket_max, bucket_count, options.dot *
-                               star_count, percentage)
+        print(format_string % (bucket_min, bucket_max, bucket_count, options.dot *
+                               star_count, percentage))
 
 
 if __name__ == "__main__":
@@ -294,7 +295,7 @@ def log_steps(k, n):
     if sys.stdin.isatty():
         # if isatty() that means it's run without anything piped into it
         parser.print_usage()
-        print "for more help use --help"
+        print("for more help use --help")
         sys.exit(1)
     histogram(load_stream(sys.stdin, options.agg_value_key,
                           options.agg_key_value), options)
diff --git a/data_hacks/ninety_five_percent.py b/data_hacks/ninety_five_percent.py
index 9a51432..0917d96 100755
--- a/data_hacks/ninety_five_percent.py
+++ b/data_hacks/ninety_five_percent.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# 
+#
 # Copyright 2010 Bitly
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
@@ -20,6 +20,7 @@
 https://github.com/bitly/data_hacks
 """
 
+from __future__ import print_function
 import sys
 import os
 from decimal import Decimal
@@ -35,16 +36,16 @@ def run():
         try:
             t = Decimal(line)
         except:
-            print >>sys.stderr, "invalid line %r" % line
+            print("invalid line %r" % line, file=sys.stderr)
         count +=1
         data[t] = data.get(t, 0) + 1
-    print calc_95(data, count)
-        
+    print(calc_95(data, count))
+
 def calc_95(data, count):
     # find the time it took for x entry, where x is the threshold
     threshold = Decimal(count) * Decimal('.95')
     start = Decimal(0)
-    times = data.keys()
+    times = list(data.keys())
     times.sort()
     for t in times:
         # increment our count by the # of items in this time bucket
@@ -54,6 +55,6 @@ def calc_95(data, count):
 
 if __name__ == "__main__":
     if sys.stdin.isatty() or '--help' in sys.argv or '-h' in sys.argv:
-        print "Usage: cat data | %s" % os.path.basename(sys.argv[0])
+        print("Usage: cat data | %s" % os.path.basename(sys.argv[0]))
         sys.exit(1)
     run()
diff --git a/data_hacks/run_for.py b/data_hacks/run_for.py
index a8ea21f..264b777 100755
--- a/data_hacks/run_for.py
+++ b/data_hacks/run_for.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# 
+#
 # Copyright 2010 Bitly
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
@@ -20,6 +20,7 @@
 https://github.com/bitly/data_hacks
 """
 
+from __future__ import print_function
 import time
 import sys
 import os
@@ -38,7 +39,7 @@ def getruntime(arg):
     elif suffix == "d":
         return base * 60 * 60 * 24
     else:
-        print >>sys.stderr, "invalid time suffix %r. must be one of s,m,h,d" % arg
+        print("invalid time suffix %r. must be one of s,m,h,d" % arg, file=sys.stderr)
 
 def run(runtime):
     end = time.time() + runtime
@@ -49,14 +50,14 @@ def run(runtime):
 
 if __name__ == "__main__":
     usage = "Usage: tail -f access.log | %s [time] | ..." % os.path.basename(sys.argv[0])
-    help = "time can be in the format 10s, 10m, 10h, etc"
+    help_str = "time can be in the format 10s, 10m, 10h, etc"
     if sys.stdin.isatty():
-        print usage
-        print help
+        print(usage)
+        print(help_str)
         sys.exit(1)
 
     runtime = getruntime(sys.argv[-1])
     if not runtime:
-        print usage
+        print(usage)
         sys.exit(1)
     run(runtime)
diff --git a/data_hacks/sample.py b/data_hacks/sample.py
index c3296ab..937479a 100755
--- a/data_hacks/sample.py
+++ b/data_hacks/sample.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# 
+#
 # Copyright 2010 Bitly
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
@@ -20,6 +20,7 @@
 https://github.com/bitly/data_hacks
 """
 
+from __future__ import print_function
 import sys
 import random
 from optparse import OptionParser
@@ -49,17 +50,17 @@ def get_sample_rate(rate_string):
     parser = OptionParser(usage="cat data | %prog [options] [sample_rate]")
     parser.add_option("--verbose", dest="verbose", default=False, action="store_true")
     (options, args) = parser.parse_args()
-    
+
     if not args or sys.stdin.isatty():
         parser.print_usage()
         sys.exit(1)
-    
+
     try:
         sample_rate = get_sample_rate(sys.argv[-1])
-    except ValueError, e:
-        print >>sys.stderr, e
+    except ValueError as e:
+        print(e, file=sys.stderr)
         parser.print_usage()
         sys.exit(1)
     if options.verbose:
-        print >>sys.stderr, "Sample rate is %d%%" % sample_rate 
+        print("Sample rate is %d%%" % sample_rate, file=sys.stderr)
     run(sample_rate)