Skip to content

Commit

Permalink
UI: A bare-bones list of feeds available
Browse files Browse the repository at this point in the history
Also introduces a users table and "templates".
  • Loading branch information
bsravanin committed Sep 4, 2019
1 parent c158660 commit 4dfc33c
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 55 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
*.swp
*.xml
__pycache__
feeds
6 changes: 6 additions & 0 deletions twitterss/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ class Config(object):
# Path to directory containing feeds.
FEED_ROOT_PATH = os.path.join(_root, 'feeds')

# Corresponding URL root.
FEED_ROOT_URL = 'https://bsravan.in/feeds'

# Path to HTML keeping a list of all RSS feeds.
FEED_LIST_HTML = os.path.join(_root, 'feeds', 'feeds.html')

# Delete tweets older than these seconds if they have already been used in RSS feeds.
DELETE_TWEETS_OLDER_THAN_SECONDS = 86400 * 7

Expand Down
54 changes: 33 additions & 21 deletions twitterss/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,19 @@
STATUS_TABLE = 'statuses'
RSS_COLUMN = 'rss_update'
STATUS_COLUMNS = OrderedDict({
'id': int,
'tweet_json': str,
RSS_COLUMN: int
'id': 'INTEGER PRIMARY KEY',
'tweet_json': 'TEXT',
RSS_COLUMN: 'INTEGER',
})
STATUS_INDICES = [RSS_COLUMN]

USER_TABLE = 'users'
USER_COLUMNS = OrderedDict({
'username': 'TEXT PRIMARY KEY',
'display_name': 'TEXT',
RSS_COLUMN: 'INTEGER',
})


def _get_conn(read_only: bool = True) -> sqlite3.Connection:
"""Get a connection to the DB."""
Expand All @@ -32,26 +39,17 @@ def _get_conn(read_only: bool = True) -> sqlite3.Connection:
return sqlite3.connect(Config.DB_PATH, isolation_level=None)


def _create_table(conn: sqlite3.Connection, table: str, schema: OrderedDict):
def _create_table(conn: sqlite3.Connection, table: str, columns: OrderedDict):
"""Create a table in the DB using the given schema."""
schema_parts = []
for key, value in schema.items():
if key == 'id':
schema_parts.append('{} INTEGER PRIMARY KEY'.format(key))
elif value == int:
schema_parts.append('{} INTEGER'.format(key))
elif value == str:
schema_parts.append('{} TEXT'.format(key))
else:
raise ValueError('Unknown type {} for column {} while creating table {}'.format(value, key, table))

conn.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(table, ', '.join(schema_parts)))
schema = ['{} {}'.format(key, value) for key, value in columns.items()]
conn.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(table, ', '.join(schema)))


def create_schema():
"""Create the full DB schema. Idempotent."""
with _get_conn(read_only=False) as conn:
_create_table(conn, STATUS_TABLE, STATUS_COLUMNS)
_create_table(conn, USER_TABLE, USER_COLUMNS)

for col_name in STATUS_INDICES:
index_name = '{}_{}'.format(STATUS_TABLE, col_name)
Expand Down Expand Up @@ -95,17 +93,31 @@ def get_tweets_to_rss_feed():
return tweets


def mark_tweets_as_rss_fed(status_ids: List[int]):
def mark_tweets_as_rss_fed(username: str, display_name: str, status_ids: List[int]):
"""To be able to periodically delete old data."""
if len(status_ids) == 0:
return
update_time = int(time.time())
status_col_values = ', '.join(['?'] * len(status_ids))
user_col_names = ', '.join(["'{}'".format(key) for key in USER_COLUMNS])
user_col_values = ', '.join(['?'] * len(USER_COLUMNS))
max_rss_time = update_time - Config.DELETE_TWEETS_OLDER_THAN_SECONDS
with _get_conn(read_only=False) as conn:
update_time = int(time.time())
col_values = ', '.join(['?'] * len(status_ids))
conn.execute('UPDATE {} SET {} = {} WHERE id IN ({})'
.format(STATUS_TABLE, RSS_COLUMN, update_time, col_values), status_ids)
.format(STATUS_TABLE, RSS_COLUMN, update_time, status_col_values), status_ids)

conn.execute('REPLACE INTO {} ({}) VALUES ({})'.format(USER_TABLE, user_col_names, user_col_values),
[username.lower(), display_name, update_time])

# Also delete old enough data while we are at it.
max_rss_time = update_time - Config.DELETE_TWEETS_OLDER_THAN_SECONDS
conn.execute('DELETE FROM {} WHERE {} > 0 AND {} < {}'
.format(STATUS_TABLE, RSS_COLUMN, RSS_COLUMN, max_rss_time))


def get_all_users() -> List[tuple]:
"""Return the full user table as a list of (username, display_name, rss_update)."""
with _get_conn() as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute('SELECT * FROM {} ORDER BY username'.format(USER_TABLE))
return [(row['username'], row['display_name'], row[RSS_COLUMN]) for row in cursor.fetchall()]
83 changes: 49 additions & 34 deletions twitterss/rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
from twitterss import db
from twitterss.config import Config


TEMPLATES_ROOT = os.path.join(os.path.dirname(__file__), 'templates')
CHANNEL_XML_TEMPLATE = os.path.join(TEMPLATES_ROOT, 'channel.xml')
FEEDS_HTML_TEMPLATE = os.path.join(TEMPLATES_ROOT, 'feeds.html')
HEADER = '''<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:atom="http://www.w3.org/2005/Atom"
Expand All @@ -41,39 +45,33 @@ def _get_tweet_url(username: str, tid: int) -> str:
return 'https://twitter.com/{}/status/{}'.format(username, tid)


def _get_feed_name(username: str) -> str:
return '{}_rss.xml'.format(username.lower())


def _get_feed_url(username: str) -> str:
return '{}/{}'.format(Config.FEED_ROOT_URL, _get_feed_name(username))


def get_feed(feed_path: str, username: str, profile_image_url: str) -> ElementTree:
"""Return RSS feed of user as an XML. Initialize if necessary."""
if os.path.isfile(feed_path):
return ElementTree.parse(feed_path)
root_str = '''
<rss version="2.0">
<channel>
<title>Tweets by {username}</title>
<xmlns_atom_link href="http://bsravan.in/feed/{feed_name}" rel="self" type="application/rss+xml" />
<link>{user_url}</link>
<description>Generated by TwitteRSS.</description>
<lastBuildDate>PLACE_HOLDER</lastBuildDate>
<language>en-US</language>
<xmlns_sy_update_period>hourly</xmlns_sy_update_period>
<xmlns_sy_update_frequency>4</xmlns_sy_update_frequency>
<generator>https://github.com/bsravanin/TwitteRSS</generator>
<image>
<url>{profile_image_url}</url>
<title>Tweets by {username}</title>
<link>{user_url}</link>
<width>32</width>
<height>32</height>
</image>
</channel>
</rss>
'''.format(username=username, feed_name=os.path.basename(feed_path), user_url=_get_user_url(username),
profile_image_url=profile_image_url)
with open(CHANNEL_XML_TEMPLATE) as cfd:
channel_xml = cfd.read()
root_str = channel_xml.format(
username=username, feed_url=_get_feed_url(username), user_url=_get_user_url(username),
profile_image_url=profile_image_url)
return ElementTree.ElementTree(ElementTree.fromstring(root_str))


def _rss_timeformat(timestamp: datetime):
def _rss_timeformat(epoch: int) -> str:
# Like "Mon, 30 Sep 2002 01:56:02 GMT"
return timestamp.strftime('%a, %d %b %Y %H:%M:%S UTC')
return datetime.fromtimestamp(epoch).strftime('%a, %d %b %Y %H:%M:%S UTC')


def _rss_time_now() -> str:
return datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S UTC')


class EnhancedTweet(object):
Expand All @@ -100,7 +98,7 @@ def get_rss_item(self) -> ElementTree.Element:
<description />
<xmlns_content_encoded>RSS_ITEM_PLACE_HOLDER</xmlns_content_encoded>
</item>'''.format(display_name=self.display_name, id=self.id, url=self.url,
pub_date=_rss_timeformat(datetime.fromtimestamp(self.inner.created_at_in_seconds)))
pub_date=_rss_timeformat(self.inner.created_at_in_seconds))
item = base_item.replace('RSS_ITEM_PLACE_HOLDER', self.get_content())
try:
return ElementTree.fromstring(item)
Expand Down Expand Up @@ -207,7 +205,7 @@ def _get_namespace_handled_xml(rss: ElementTree.Element) -> str:


def _update_feed(username: str, tweets: List[Status]):
feed_path = os.path.join(Config.FEED_ROOT_PATH, '{}_rss.xml'.format(username).lower())
feed_path = os.path.join(Config.FEED_ROOT_PATH, _get_feed_name(username))
profile_image_url = tweets[0].user.profile_image_url_https or 'https://abs.twimg.com/favicons/win8-tile-144.png'
feed = get_feed(feed_path, username, profile_image_url)
rss = feed.getroot()
Expand All @@ -220,12 +218,28 @@ def _update_feed(username: str, tweets: List[Status]):
# TODO: Hard-coded assumption that items start at the 10th place as channel children.
channel.insert(index + 10, EnhancedTweet(tweet).get_rss_item())
for lastBuildDate in channel.iter('lastBuildDate'):
lastBuildDate.text = _rss_timeformat(datetime.utcnow())
lastBuildDate.text = _rss_time_now()
feed_str = _get_namespace_handled_xml(rss)
with open(feed_path, 'w') as xfd:
xfd.write(feed_str)


def _update_feeds_html():
with open(FEEDS_HTML_TEMPLATE) as hfd:
full_html = hfd.read()
full_trs = []
for username, display_name, timestamp in db.get_all_users():
name_td = '<td>{}</td>'.format(display_name)
twitter_td = '<td><a href="{}">@{}</a></td>'.format(_get_user_url(username), username)
feed_td = '<td><a href="{}">{}</a></td>'.format(_get_feed_url(username), _get_feed_name(username))
timestamp_td = '<td>{}</td>'.format(_rss_timeformat(timestamp))
full_trs.append('<tr>{}{}{}{}</tr>'.format(name_td, twitter_td, feed_td, timestamp_td))

full_html = full_html.replace('PLACEHOLDER', '\n'.join(full_trs))
with open(Config.FEED_LIST_HTML, 'w') as hfd:
hfd.write(full_html)


def generate_feeds():
"""Periodically fetch new tweets from the DB and update their corresponding RSS feeds."""
os.makedirs(Config.FEED_ROOT_PATH, exist_ok=True)
Expand All @@ -235,10 +249,11 @@ def generate_feeds():
logging.info('No new tweets in DB. Sleeping %ss.', Config.SLEEP_ON_CATCHING_UP_SECONDS)
time.sleep(Config.SLEEP_ON_CATCHING_UP_SECONDS)
continue
user_to_tweets = defaultdict(list)
username_to_tweets = defaultdict(list)
for tweet in all_new_tweets:
user_to_tweets[tweet.user.screen_name].append(tweet)
for user, tweets in user_to_tweets.items():
logging.info('Updating RSS feed of %s with %s tweets.', user, len(tweets))
_update_feed(user, tweets)
db.mark_tweets_as_rss_fed([tweet.id for tweet in tweets])
username_to_tweets[tweet.user.screen_name].append(tweet)
for username, tweets in username_to_tweets.items():
logging.info('Updating RSS feed of %s with %s tweets.', username, len(tweets))
_update_feed(username, tweets)
db.mark_tweets_as_rss_fed(username, tweets[0].user.name, [tweet.id for tweet in tweets])
_update_feeds_html()
31 changes: 31 additions & 0 deletions twitterss/templates/feeds.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css"
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
<title>RSS feeds generated by TwitteRSS</title>
</head>

<body>
<div class="container-fluid">
<div class="row">
<main role="main" class="col-lg-12 px-4">
<div class="table-responsive">
<table class="table table-striped table-sm">
<thead>
<tr>
<th>User</th><th>Twitter Page</th><th>RSS Feed</th><th>Last Updated</th>
</tr>
</thead>
<tbody>
PLACEHOLDER
</tbody>
</table>
</div>
</main>
</div>
</div>
</body>
</html>

0 comments on commit 4dfc33c

Please sign in to comment.