-
Notifications
You must be signed in to change notification settings - Fork 0
/
simons2.py
executable file
·45 lines (41 loc) · 1.12 KB
/
simons2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python
from __future__ import division
import sys
import os
import re
import argparse
# usage example
# cat casa.txt | ./simons.py | sort | uniq -c | awk -F" " '{print $1}' | awk '{s+=$1} END {print $1/s}'
def is_valid_file(parser, arg):
arg = os.path.abspath(arg)
if not os.path.exists(arg):
parser.error("The file %s does not exist!" % arg)
else:
return arg
parser = argparse.ArgumentParser()
parser.add_argument("-i", dest="filename", required=False, help="input text file", metavar="FILE", type=lambda x: is_valid_file(parser, x))
args = parser.parse_args()
wordfreq = {}
Nf = []
if args.filename:
f = open(args.filename, "rt")
else:
f = sys.stdin
lines = f.readlines()
for line in lines:
s = line.split()
for word in s:
word = word.lower().strip()
if not word in wordfreq:
wordfreq[word] = 0
wordfreq[word] += 1
if wordfreq[word] - 1 < len(Nf):
Nf[ wordfreq[word] - 1 ] += 1
if wordfreq[word] - 1 > 0:
Nf[ wordfreq[word] - 2 ] -= 1
else:
Nf.append(1)
if wordfreq[word] - 1 > 0:
Nf[ wordfreq[word] - 2 ] -= 1
#print Nf
print Nf