-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDataCollection.py
72 lines (58 loc) · 2.14 KB
/
DataCollection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
"""
@author: Sayali
"""
import tweepy
import json
import time
consumer_key='FJkeW0VV0D6HGPYlF5UfklTK5'
consumer_secret='IM8zRQFIq4wbKBgikZKNLqiEkHH6ePSg20Ag6bE1QLY6dIQPGM'
access_token_key='4921031892-twRpm76J6kgd3cWp2d4dIMkp674ocaggbQiUgCX'
access_token_secret='nTPkL7TXTD4winCFu8INTzdE6ALAIYNk9Tb39d4R0DgYS'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)
myApi = tweepy.API(auth)
class StreamListener(tweepy.StreamListener):
def on_data(self, raw_data):
try:
jdata = json.loads(str(raw_data))
outputFile = open("streamData1.txt", "a+")
outputFile.write(json.dumps(jdata) + "\n")
outputFile.close()
except:
print 'Data writting exception.'
def CollectStreamData():
while(True):
sl = StreamListener()
stream = tweepy.Stream(auth, sl)
try:
stream.filter(track = ['asthma'])
except:
print 'Exception occur!'
def CollectRestData():
query = "asthma"
GEO = "40.7127750,-74.0059730,30mi" #NYC
outputFile = open("restData1.txt", "a+")
#Collect most recent 100 tweets
tweets = myApi.search(q=query, geocode=GEO, count=100)
for tweet in tweets:
outputFile.write(json.dumps(tweet._json) + "\n")
MAX_ID = tweets[-1].id
#Continue collecting tweets till last tweet
while len(tweets) > 0:
try:
tweets = myApi.search(q=query, geocode=GEO, count=100, max_id = MAX_ID)
if tweets:
MAX_ID = tweets[-1].id
print MAX_ID, len(tweets)
for tweet in tweets:
outputFile.write(json.dumps(tweet._json) + "\n")
except tweepy.TweepError:
print('exception raised, waiting for 15 minutes')
time.sleep(10*60)
break
if __name__ == '__main__':
#Collect tweets using Stream API
#CollectStreamData()
#Collect tweets using REST API
CollectRestData()