-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrest.py
79 lines (64 loc) · 2.99 KB
/
rest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Input: screen name of account
# Output: up to 200 most recent retweets of the account
from __future__ import absolute_import, print_function
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import tweepy
import json
import pickle
import codecs
from tweepy.utils import parse_datetime
# Step 1: Authentication
# Go to http://apps.twitter.com and create an app.
# Copy and paste the keys and tokens below
consumer_key="f5vfC0cTaHA6AKLOwQHtdImOX"
consumer_secret="a3skaiwaheh7jbjbgOJyu3yMRVnqJQPxsH7OfZi7ZAZVMEoNuT"
access_token="971917970829185024-T6gI59cPkBeNfGWYWAJynIvlzUsA2Lc"
access_token_secret="5Z1juX40eKwt9gtVLDfdcvXesKSW8O4zCbOmJun0JBYoh"
# Step 2: Provide Input
# Provide the screen_name (id) of the Twitter Account you want to scrape
screen_name = 'realDonaldTrump'
# ****** Ready to Run *********
auth = OAuthHandler(consumer_key, consumer_secret)
auth.secure = True
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, parser=tweepy.parsers.JSONParser(), retry_count=5)
rate = api.rate_limit_status()
rate_follower_IDs = rate['resources']['followers']['/followers/ids']['limit'] * 4 # requests per hour
rate_follower_profiles = rate['resources']['users']['/users/lookup']['limit'] * 4 # requests per hour
# doc explaning each field https://dev.twitter.com/overview/api/tweets
tweet_cols = ['id_str', 'user.screen_name', 'created_at', 'retweeted_status.id_str',
'retweeted_status.user.id_str', 'retweeted_status.user.screen_name',
'retweeted_status.favorite_count', 'favorite_count', 'retweet_count', 'text']
## Print a list of dict variable into csv file
# vals: a list, each element is a dict
# cols: the fields (keys) to be retrieved in each dict
def printList(vals, cols, file_name):
f = codecs.open(file_name, 'w', 'utf-8')
f.writelines('\t'.join(cols)+'\n')
for v in vals:
if v==None: continue
fields = [getDictField(v, e) for e in cols]
f.writelines('\t'.join(fields)+'\n')
f.close()
## Access an element in a nested dict variable by key
# d: a dict
# field: the field to be retrieved. seperated by '.' if nested
def getDictField(d, field):
names = field.split('.')
for name in names:
if name in d: d = d[name]
try:
if "created_at" in field: d = tweepy.utils.parse_datetime(d)
if 'text' in field: d = d.replace('\r', ' ').replace('\n', ' ').replace('\t', ' ')
return str(d)
except:
print("@@@Exception while extracting the field %s from the following object" % field)
print(d)
return "fail_to_convert_to_string"
if __name__ == '__main__':
# Scrape the most recent N (up to 200) tweets authored by the given id
tweets = api.user_timeline(screen_name=screen_name, count=200)
printList(tweets, tweet_cols, screen_name+'.tsv')
print('Done!')