Skip to content

Commit

Permalink
Make new topic history graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
mas-4 committed Jul 13, 2024
1 parent 36f3536 commit 0b6aab8
Show file tree
Hide file tree
Showing 6 changed files with 298 additions and 63 deletions.
1 change: 1 addition & 0 deletions app/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
def gen_plots(dh: DataHandler):
Plots.sentiment_graphs(dh.all_sentiment_data)
Plots.topic_history_bar(dh.topic_df.copy())
Plots.topic_history_stacked_area(dh.topic_df.copy())
Plots.topic_today_bubble(dh.topic_df.copy())
Plots.topic_today_bar(dh.topic_df.copy())
Plots.individual_topic(dh.topic_df.copy(), dh.topics)
Expand Down
1 change: 1 addition & 0 deletions app/site/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class FileNames:
main_wordcloud = 'wordcloud.png'
sentiment_graphs = 'sentiment-graphs.png'
topic_history_bar_graph = 'topic_history_bar_graph.png'
topic_history_stacked_area = 'topic_history_stacked_area.png'
topic_today_bubble_graph = 'topic_today_bubble_graph.png'
topic_today_bar_graph = 'topic_today_bar_graph.png'
agency_distribution = 'agency_distribution.png'
Expand Down
4 changes: 4 additions & 0 deletions app/site/data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import time
from datetime import timedelta as td
from enum import Enum
Expand Down Expand Up @@ -205,6 +206,9 @@ def get_main_headline_df():
df = pd.DataFrame(data, columns=list(cols.keys()))
logger.info("Queried %i headlines for main df.", len(df))

if len(df) == 0:
sys.exit("No headlines found. Exiting.")

df['first_accessed'] = df['first_accessed'].dt.tz_localize('utc').dt.tz_convert('US/Eastern')
df['last_accessed'] = df['last_accessed'].dt.tz_localize('utc').dt.tz_convert('US/Eastern')
roundcol = ['vader_compound', 'afinn', 'topic_score']
Expand Down
49 changes: 41 additions & 8 deletions app/site/graphing.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,35 +164,68 @@ def topic_history_bar(df: pd.DataFrame):
for i, topic in enumerate(sorted_topics.index):
topic_df = df[df['topic'] == topic].copy()
topic_df['day'] = topic_df['first_accessed'].dt.date
# Drop everything before 30 days ago
topic_df = topic_df[topic_df['day'] > dt.now().date() - td(days=30)]

# Group by day and calculate number of articles
topic_df = topic_df.groupby('day').agg({'afinn': 'count'})
topic_df = topic_df.rename(columns={'afinn': 'articles'})
if len(topic_df) < len(bottom):
topic_df = topic_df.reindex(bottom.index, fill_value=0)
ax.bar(topic_df.index, topic_df.articles, label=topic, bottom=bottom['bot'], color=topic_colors[topic],
edgecolor='black')
edgecolor='black', width=1)
bottom['bot'] += topic_df.articles

ax.set_xlim(bottom.index[0] - td(days=1), bottom.index[-1] + td(days=1))
# Set xlim to 30 days ago to today
ax.set_xlim(dt.now().date() - td(days=30), dt.now().date())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))
ax.set_xticks(ax.get_xticks()[::2])
ax.set_xticklabels(ax.get_xticklabels(), rotation=rotation)

handles, labels = ax.get_legend_handles_labels()
# Put the legend above the graph, arranged horizontally
ax.legend(handles[::-1], labels[::-1], loc='lower center', bbox_to_anchor=(0.5, 1.04), ncol=5, frameon=True,
facecolor='lightgray', edgecolor='black', framealpha=0.9, fontsize='medium', title_fontsize='large',
fancybox=True, shadow=True, borderpad=1.2, labelspacing=1.5)


for spine in ['right', 'top', 'left', 'bottom']:
ax.spines[spine].set_visible(False)
apply_special_dates(ax, 'all') # Assume this function is defined elsewhere
# apply_special_dates(ax, 'all') # Assume this function is defined elsewhere
plt.tight_layout()
plt.savefig(PathHandler(
PathHandler.FileNames.topic_history_bar_graph).build) # Assume PathHandler is defined elsewhere

@classmethod
def topic_history_stacked_area(cls, df: pd.DataFrame):
window = 7
fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.2, top=0.8)
fig.set_size_inches(13, 8)
df = df[df['topic'] != '']
# Sum the number of articles per topic per day
df['day'] = df['first_accessed'].dt.date
df = df.groupby(['day', 'topic']).agg({'afinn': 'count'}).unstack().fillna(0)
df.columns = df.columns.droplevel()
df = df.reindex(sorted(df.columns), axis=1)
# rolling 7 day average
df = df.rolling(window=window).mean()
df = df[df.sum().sort_values(ascending=False).index]
# drop na
df = df.dropna()

ax.stackplot(df.index, df.values.T, labels=df.columns, colors=[topic_colors[topic] for topic in df.columns])
ax.yaxis.set_visible(False)
ax.set_title(f"Which topics are being covered? ({window}-day moving average)")

for spine in ['right', 'top', 'left', 'bottom']:
ax.spines[spine].set_visible(False)
handles, labels = ax.get_legend_handles_labels()
# Put the legend above the graph, arranged horizontally
ax.legend(handles[::-1], labels[::-1], loc='lower center', bbox_to_anchor=(0.5, 1.04), ncol=5, frameon=True,
facecolor='lightgray', edgecolor='black', framealpha=0.9, fontsize='medium',
title_fontsize='large',
fancybox=True, shadow=True, borderpad=1.2, labelspacing=1.5)
apply_special_dates(ax, 'all')
plt.tight_layout()
plt.savefig(PathHandler(PathHandler.FileNames.topic_history_stacked_area).build)

@classmethod
def individual_topic(cls, df, topics):
# blue to red
Expand Down
1 change: 1 addition & 0 deletions app/site/templates/topics.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
<img src="{{ FileNames.topic_today_bar_graph }}" alt="Graphs">
<img src="{{ FileNames.topic_today_bubble_graph }}" alt="Graphs">
<img src="{{ FileNames.topic_history_bar_graph }}" alt="Graphs">
<img src="{{ FileNames.topic_history_stacked_area }}" alt="Graphs">
</div>
<ol id="date-key">
{% for date in dates %}
Expand Down
305 changes: 250 additions & 55 deletions dev/research/plotting.ipynb

Large diffs are not rendered by default.

0 comments on commit 0b6aab8

Please sign in to comment.