-
Notifications
You must be signed in to change notification settings - Fork 1
/
gyear.py
74 lines (61 loc) · 1.87 KB
/
gyear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import sqlite3
import time
import zlib
conn = sqlite3.connect('index.sqlite')
cur = conn.cursor()
cur.execute('SELECT id, sender FROM Senders')
senders = dict()
for message_row in cur :
senders[message_row[0]] = message_row[1]
cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
messages = dict()
for message_row in cur :
messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4])
print("Loaded messages=",len(messages),"senders=",len(senders))
sendorgs = dict()
for (message_id, message) in list(messages.items()):
sender = message[1]
pieces = senders[sender].split("@")
if len(pieces) != 2 : continue
dns = pieces[1]
sendorgs[dns] = sendorgs.get(dns,0) + 1
# pick the top schools
orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
orgs = orgs[:10]
print("Top 10 Organizations")
print(orgs)
# orgs = ['total'] + orgs
counts = dict()
years = list()
# cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
for (message_id, message) in list(messages.items()):
sender = message[1]
pieces = senders[sender].split("@")
if len(pieces) != 2 : continue
dns = pieces[1]
if dns not in orgs : continue
year = message[3][:4]
if year not in years : years.append(year)
# key = (year,dns)
# counts[key] = counts.get(key,0) + 1
tkey = (year, 'total')
counts[tkey] = counts.get(tkey,0) + 1
years.sort()
print(counts)
print(years)
fhand = open('gline.js','w')
fhand.write("gline = [ ['Year'")
for org in orgs:
fhand.write(",'"+org+"'")
fhand.write("]")
for year in years:
fhand.write(",\n['"+year+"'")
for org in orgs:
key = (year, org)
val = counts.get(key,0)
fhand.write(","+str(val))
fhand.write("]")
fhand.write("\n];\n")
fhand.close()
print("Output written to gline.js")
print("Open gline.htm to visualize the data")