-
Notifications
You must be signed in to change notification settings - Fork 0
/
kafka-stream.py
43 lines (32 loc) · 1.39 KB
/
kafka-stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from elasticsearch import Elasticsearch
import json
import secrets
from kafka import KafkaProducer
import time
# es = Elasticsearch([secrets.es_host_name])
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
tweet = json.loads(data)
print (tweet)
producer = KafkaProducer(bootstrap_servers='localhost:9092')
producer = KafkaProducer(value_serializer=lambda data: json.dumps(data))
print ("inside for loop")
producer.send('test', {"tweet":data})
producer.flush()
# res = es.index(index="streaming-tweets", doc_type='tweets', id=None, body=tweet)
# print(res['created'])
return True
def on_error(self, status):
print (status)
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(secrets.consumer_key, secrets.consumer_secret)
auth.set_access_token(secrets.access_token, secrets.access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
stream.filter(track=['#analytics'])