-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsocrata-twitterbot-csv-memory.py
115 lines (79 loc) · 3.49 KB
/
socrata-twitterbot-csv-memory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# coding: utf-8
# In[ ]:
#import the necessary libraries
import csv, requests, datetime, time
import simplejson as json
import pandas as pd
from twython import Twython
# In[ ]:
#change this to the SOCRATA portal you want to target, don't forget the trailing slash
targeturl ='http://chhs.data.ca.gov/'
# In[ ]:
#build data.json url string accaccording to SOCRATA's convention
r=requests.get(targeturl+"api/dcat.json")
# In[ ]:
#parse the json response into a dictionary named j, coincidentally j's KVPs are also dictionaries
j=r.json()
# In[ ]:
#stop if something went wrong
if r.status_code==200: print("\nsuccessfully fetched json data, http return code 200")
else: sys.exit()
# In[ ]:
#authenticate with your own twitter application tokens below
twitter = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
# In[ ]:
#function that truncates long titles and sends the tweet
def tweet_it(identifier,title):
title=title[:89] #if title is too long, truncate it to fit, will require adj. if wording is changed
x="Updated data \""+title+"\" "+targeturl+"d/"+identifier
print (x), "debug tweet sent" #for debugging
twitter.update_status(status=x) #send the tweet
time.sleep(2) #wait 2 seconds between tweets, this can be adjusted
# In[ ]:
#declare global 'today' variable for data munging use
today=datetime.datetime.today()
#declare a threshold below which tweets will not be REPEATED
#ex. if a dataset is modified EVERY DAY, threshold=7 will ensure that
#dataset is only tweeted once every 7 days
threshold=7
# In[ ]:
#read in pre-existing memory file, if one does not exist this will cause an error
#create a memory file by using the code included
memory=pd.read_csv('memory.csv',index_col=0)
print (memory) #debug
# In[ ]:
for i in j:
if len(i['identifier']) == 9:
if i['identifier'] in memory.index.values: #known to us
c_str=memory.loc[i['identifier']]['created']
m_str=memory.loc[i['identifier']]['modified']
l_str=memory.loc[i['identifier']]['last_tweeted']
c_dt=datetime.datetime.strptime(c_str,'%Y-%m-%d')
m_dt=datetime.datetime.strptime(m_str,'%Y-%m-%d')
l_dt=datetime.datetime.strptime(l_str,'%Y-%m-%d')
delta=(today-l_dt).days>threshold
valid=(today-m_dt).days<=1
# print (delta, valid) #debug
if delta and valid:
print ("valid update")
tweet_it(i['identifier'],i['title'])
memory.loc[i['identifier']]['modified']=i['modified'] # update the record's modified date
memory.loc[i['identifier']]['last_tweeted']=today.strftime('%Y-%m-%d') # update the record's last_tweeted date
else:
print ("known but not changed or not old enough")
memory.loc[i['identifier']]['modified']=i['modified'] # update the record's modified date
else:
try:
print (i['identifier']+"is new, not known to memory")
tweet_it(i['identifier'],i['title'])
new_record=pd.DataFrame([[i['identifier'],i['created'],i['modified'],today.strftime('%Y-%m-%d')]], columns=['id','created','modified','last_tweeted'])
new_record=new_record.set_index('id')
memory=memory.append(new_record)
print (new_record)
except:
pass
# print (memory)
# print("done")
# In[ ]:
#create updated memory file after today's changes
memory.to_csv('memory.csv')