-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy patht.py
191 lines (155 loc) · 7.01 KB
/
t.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python
# encoding: utf-8
"""
t.py
Created by Hilary Mason on 2010-04-25.
Copyright (c) 2010 Hilary Mason. All rights reserved.
"""
import sys, os
import re
from optparse import OptionParser
import pymongo
import tweepy
import settings
from lib import mongodb
from lib import display
class Twitter(object):
def __init__(self, options):
self.settings = self.load_settings()
self.db = mongodb.connect('tweets')
tweets = self.load_tweets(int(options.num), sort=options.sort, mark_read=options.mark_read)
d = display.Display()
d.display_tweets(tweets)
def load_tweets(self, num, sort='time',mark_read=True):
tweets = []
if sort == 'antitime': # sort by time, oldest first
for t in self.db['tweets'].find(spec={'r': {'$exists': False } }).sort('created_at',direction=pymongo.ASCENDING).limit(num):
t['_display'] = True # mark all for display, so optimistic
tweets.append(t)
elif sort == 'rel':
for t in self.db['tweets'].find(spec={'r': {'$exists': False } }).sort('created_at',direction=pymongo.ASCENDING): # get all unread tweets
t['_display'] = True
tweets.append(t)
tweets = self.sort_by_relevance(tweets, num=num)
elif sort == 'inf':
for t in self.db['tweets'].find(spec={'r': {'$exists': False } }).sort('created_at',direction=pymongo.ASCENDING): # get all unread tweets
t['_display'] = True
tweets.append(t)
tweets = self.sort_by_influence(tweets, num=num)
else: # sort by time, newest first
for t in self.db['tweets'].find(spec={'r': {'$exists': False } }).sort('created_at',direction=pymongo.DESCENDING).limit(num):
t['_display'] = True
tweets.append(t)
# mark these tweets as 'read' in the db
if mark_read:
for t in tweets:
self.db['tweets'].update({'_id': t['_id']}, {'$set': {'r': 1 }})
# black/white lists
for t in tweets:
if t['user'] in self.settings['blacklist_users']:
t['_display'] = False
for blackword in self.settings['blacklist']:
if blackword.search(t['text'].lower()):
t['_display'] = False
t['_display_topics'] = []
try:
for topic, score in t['topics'].items():
# print "topic: %s, score: %s" % (topic, score)
# print "threshold: %s" % self.settings['topic_thresholds'][topic]
if score >= self.settings['topic_thresholds'][topic]:
t['_display_topics'].append(topic)
except KeyError: # no topic analysis for this tweet
pass
if t['user'] in self.settings['whitelist_users']:
t['_display'] = True
# cache any links in these tweets so I can get to them easily
self.extract_links(tweets)
return tweets
def sort_by_influence(self, tweets, num):
"""
sort_by_influence: sort tweets by klout score
"""
for t in tweets:
for k in self.db['users'].find(spec={'_id':t['author']}, fields={'klout_score': True}):
try:
t['influence'] = k['klout_score']
except KeyError: # no klout score
t['influence'] = 0
return sorted(tweets, key=lambda x:-x['influence'])[:num]
def sort_by_relevance(self, tweets, num):
"""
sort_by_relevance: sorts tweets by arbitrary relevance to me. Criteria:
1) does it mention me?
2) is it by someone on my whitelist?
3) is it about a topic that I care about?
4) sort remainder by 'interestingness'
"""
mentions = []
whitelist = []
topical = []
other = []
for t in tweets:
t['_display_topics'] = []
try:
for topic, score in t['topics'].items():
if score >= self.settings['topic_thresholds'][topic]:
t['_display_topics'].append(topic)
except KeyError:
pass
if settings.TWITTER_USERNAME in t['text']:
mentions.append(t)
elif t['user'] in self.settings['whitelist_users']:
whitelist.append(t)
elif t['_display_topics']:
topical.append(t)
else:
other.append(t)
tweets = mentions + whitelist + topical + other
return tweets[:num]
def extract_links(self, tweets):
"""
extract_links: pull links out of tweets and cache in a text file
"""
re_http = re.compile("(http|https):\/\/(([a-z0-9\-]+\.)*([a-z]{2,5}))\/[\w|\/]+")
links = []
for t in tweets:
r = re_http.search(t['text'])
if r:
links.append(r.group(0))
if links:
f = open(self.settings['link_cache_filename'], 'w')
for link in links:
f.write('%s\n' % link)
f.close()
def load_settings(self):
settings = {}
settings['topic_thresholds'] = {'default': .6, 'betaworks': 1.0, 'narcissism': .25, 'sports': .9999 }
settings['link_cache_filename'] = 'link_cache'
try:
f = open('whitelist_users', 'r')
settings['whitelist_users'] = [user.strip() for user in f.readlines()]
f.close()
except IOError:
settings['whitelist_users'] = []
try:
f = open('blacklist_users', 'r')
settings['blacklist_users'] = [user.strip() for user in f.readlines()]
f.close()
except IOError:
settings['blacklist_users'] = []
try:
f = open('blacklist', 'r')
settings['blacklist'] = [re.compile(b.lower().strip()) for b in f.readlines()]
f.close()
except IOError:
settings['blacklist'] = []
return settings
if __name__ == "__main__":
parser = OptionParser("usage: %prog [options]") # no args this time
parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False, help="set debug mode = True")
parser.add_option("-m", "--mark_read", dest="mark_read", action="store_false", default=True, help="Don't mark displayed tweets as read")
parser.add_option("-s", "--sort", dest="sort", action="store", default='time', help="Sort by time, antitime, rel")
parser.add_option("-n", "--num", dest="num", action="store", default=10, help="number of tweets to retrieve")
# parser.add_option("-t", "--topic", dest="topic", action="store", default=None, help="show one topic only")
(options, args) = parser.parse_args()
t = Twitter(options)