This repository has been archived by the owner on Dec 17, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
snapshot_mailman.py
executable file
·69 lines (64 loc) · 2.94 KB
/
snapshot_mailman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
import argparse
from lxml import html
import requests
import os
from lib import util
from lib.backend import Session
from lib.backend.model import SnapshotOfMailman, ActivityInMailman
from datetime import datetime,timedelta
def snapshot_mailman(verbose=False):
lists = util.list_mailman_lists(verbose)
today = datetime.now().date()
for l in lists:
if verbose: print 'Processing snapshots for %s...' % l['name']
latest = Session.query(SnapshotOfMailman)\
.filter(SnapshotOfMailman.list_name==l['name'])\
.order_by(SnapshotOfMailman.timestamp.desc())\
.first()
# By default, gather 30 days of snapshots
since = today - timedelta(days=180)
if latest:
if latest.timestamp>=today:
if verbose: print ' -> most recent snapshots have already been processed.'
continue
since = latest.timestamp + timedelta(days=1)
# Download subscriber list
roster_url = l['link'].replace('listinfo','roster')
num_subscribers = len(_scrape_subscribers(roster_url, verbose=verbose))
# Create a snapshot of each day
while since<today:
posts_today = Session.query(ActivityInMailman)\
.filter(ActivityInMailman.list_name==l['name'])\
.filter(ActivityInMailman.timestamp.between(since,since+timedelta(days=1)))\
.count()
sn = SnapshotOfMailman(\
list_name=l['name'],\
timestamp=since,\
subscribers=num_subscribers,
posts_today=posts_today)
Session.add(sn)
if verbose: print ' -> ',sn.toJson()
since += timedelta(days=1)
# Walk through message history, counting messages per day
Session.commit()
def _scrape_subscribers(url, verbose=False):
"""Access the list's roster and generate
a text->href list of members of this list."""
# [email protected] can access list rosters
payload={'roster-email':'[email protected]', 'roster-pw':os.environ.get('MAILMAN_ADMIN_PW')}
if verbose: print 'Scraping subscriber list for %s...' % url
r = requests.post(url, data=payload)
# Did we get in?
if 'roster authentication failed' in r.text:
raise ValueError('Roster authentication failed. Bad password.')
# Scrape all the links to email--at--domain.com
tree = html.fromstring( r.text )
_links = tree.cssselect('a')
links = filter( lambda x: '--at--' in x.attrib['href'], _links )
return { x.text_content : x.attrib['href'] for x in links }
if __name__=='__main__':
parser = argparse.ArgumentParser(description='Daily snapshot of Mailman activity.')
parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help='Verbose output')
arg = parser.parse_args()
snapshot_mailman(verbose=arg.verbose)