-
Notifications
You must be signed in to change notification settings - Fork 0
/
Twitter_streaming1.py
101 lines (81 loc) · 3.54 KB
/
Twitter_streaming1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
import re
import time
#Variables that contains the user credentials to access Twitter API
access_token = "1322512952-13U0KQWflirRj9TGzwjQeEMpsi8N9vbY7aYyNP5"
access_token_secret = "T7dQcdOsgWhdaoxNnvT6ZjACe5QI0QZD3ULyRUaZJDCrq"
consumer_key = "pCe5mSXcFrO37P8qUyHFTJn2h"
consumer_secret = "wt6Jp8rrh6jdDbV1AlpTikl2guYoVUe8bBVST8q9UdvCwZzeZC"
# Take user input for keyword
var = raw_input("Please enter keyword: ")
print "you entered", var
words = {}
time1 = time.time() #start time
time2 = time1
N = 100 #max size of cache
flag = 0 #flag to output every 1 minute
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def score(self): #this function updates count of each word
if len(words) > 0: #do not look into dictionary if empty
keys = words.keys() #list of keys to be updated
for key in keys:
if(time2 - words[key][1] >= 60): #decrease the score if older than 60 sec
words[key][0] = words[key][0] - 1
if(words[key][0] < 0): #delete if score < 0
del words[key]
def process_data(self, data): #this function processes twitter data
global time1
global time2
global flag
list_words = data.split() #list of words in data
length = len(words)
for word in list_words: #these are the words to be inserted in dictionary
if(time.time() - time1 >= 30):
break
if length == N: #if dictionary reaches its size
key = words.keys() #list of present keys in dictionary
for entry in key: #delete entry with 0 score
if(words[entry][0] == 0):
del words[entry]
length = len(words)
if(length == N): #if size of dictionary does not reduce
break #then do not add any more words
if(word in words): #if word is already present then update its score
words[word][0] = words[word][0] + 1
words[word][1] = time.time() #Note time when entry is updated
else: #else word is new. Insert it in dictionary
words[word] = [1, time.time()] #Note time when entry is inserted
length = length + 1 #use this time later to determine age of entry
if(time.time() - time1 >= 30): #update dictionary every 30 sec
time1 = time.time()
time2 = time1
flag = flag + 1
self.score()
if(len(words) > 0 and flag == 2): #flag = 2 => 60 seconds passed
for keys in words: #print words in dictionary
if(words[keys][0] > 1):
print keys, words[keys][0]
flag = 0 #update flag to print on every 60 sec
print 'wait for 1 minute to get words in cache'
def on_data(self, raw_data):
data = json.loads(raw_data)
if("text" in data): #if there is any tweet data, its key will be 'text'
data1 = [data["text"]] #extract tweet from tons of data
else: #else go for next tweet data
return(True)
self.process_data(data1[0])
return(True)
def on_error(self, status):
print(status)
return True
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.filter(track=[var])