-
Notifications
You must be signed in to change notification settings - Fork 0
/
obituarybot.py
145 lines (129 loc) · 4.14 KB
/
obituarybot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import twitter
import nltk
import wikipedia as w
import random
import re
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
def unparse(sentence):
words = [token[0] for token in sentence]
for i, word in enumerate(words):
if word == ',' or word == ':':
words[i - 1] += word
del words[i]
words = " ".join(words)
if words[-1] == '.':
words = words[:-1]
return words
def preprocess_sentences(sentences):
sentences = [nltk.word_tokenize(sent) for sent in sentences]
sentences = [nltk.pos_tag(sent) for sent in sentences]
return sentences
def rhyme(inp, level):
entries = nltk.corpus.cmudict.entries()
syllables = [(word, syl) for word, syl in entries if word == inp]
rhymes = []
for (word, syllable) in syllables:
rhymes += [word for word, pron in entries if pron[-level:] == syllable[-level:]]
return set(rhymes)
def np_chunk(sentence):
grammar = nltk.RegexpParser("""NP:
{<.*>+}
}<CC>{""")
result = grammar.parse(sentence)
return result
def get_rhymes(target, sentences, level = 2):
print target[-1][0]
rhymes = rhyme(target[-1][0], level)
result = []
for sentence in sentences[1:]:
if len(sentence) > 1 and sentence[-2][0] in rhymes and sentence[-2][0] != target:
result.append(sentence)
if len(result) > 0:
return result
else:
rhymes = rhyme(target[-1][0], level-1)
result = []
for sentence in sentences[1:]:
if len(sentence) > 1 and sentence[-2][0] in rhymes and sentence[-2][0] != target:
result.append(sentence)
return result
def get_poem(names):
for name in names:
try:
text = w.page(name).content
except (w.exceptions.PageError, w.exceptions.DisambiguationError):
continue
raw_sentences = nltk.sent_tokenize(text)
sentences = preprocess_sentences(raw_sentences)
first = np_chunk(sentences[0])[0]
if first[-1][0] == '.':
first = first[:-1]
rhymes = get_rhymes(first, sentences)
if len(rhymes) == 0:
continue
rhymes.sort(key=len)
first = unparse(first)
first = first.replace(' is ', ' was ')
poem = first + "\n" + unparse(rhymes[0]) + "\n" + "They died"
poem = re.sub('\(([^)]+)\)', '', poem)
poem = re.sub(' +', ' ', poem)
if len(poem) <= 140:
return poem
elif len(poem) <= 149:
poem = poem[:-10]
return poem
else:
print "too long"
print poem
continue
else:
return None
api = twitter.Api(consumer_key='',
consumer_secret='',
access_token_key='',
access_token_secret='')
def post_tweet(tweet):
print tweet
try:
status = api.PostUpdate(tweet)
print status
except twitter.error.TwitterError:
print twitter.error.TwitterError
pass
def create_obit(possible_people):
return get_poem(possible_people)
def random_death_list():
year = random.randint(1987, 2015)
if year > 2003:
return "Deaths in " + random.choice(months) + " " + str(year)
else:
return "Deaths in " + str(year)
def get_people(n = 15):
if n > 25:
people = []
for i in n // 25:
people.extend(get_people(25))
return people
death_list = random_death_list()
links = w.page(death_list).links
content = w.page(death_list).content
people = []
random.shuffle(links)
for person in links:
loc = content.find(person)
if loc > 0 and content[loc:loc + len(person) + 1][-1] == ',':
people.append(person)
if len(people) > n:
return people
def main():
people = get_people(25)
try:
obit = create_obit(people)
except:
obit = create_obit(people)
if obit == None:
main()
else:
post_tweet(obit)
while True:
main()