diff --git a/.env-example b/.env-example index 4d29238..6d55022 100644 --- a/.env-example +++ b/.env-example @@ -3,3 +3,13 @@ api_key=xoxb-something default_channel=dev_test payment_key="If you're going to work with ping_payment, you'll need to fill this out." payment_value="Otherwise, don't worry about it." + +enable_blossom=false +blossom_email="" +blossom_password="" +blossom_api_key="" +blossom_api_url="https://grafeas.org" + +QUEUE_POST_TIMEOUT=18 +DEFAULT_CTQ_DURATION=12 +MAX_GRAPH_ENTRIES=10 diff --git a/bubbles/commands/ctq_graphs.py b/bubbles/commands/ctq_graphs.py new file mode 100644 index 0000000..7f3b06e --- /dev/null +++ b/bubbles/commands/ctq_graphs.py @@ -0,0 +1,774 @@ +"""Generation of graphs for the !ctqstats command.""" +import re +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional, Tuple + +from matplotlib import pyplot as plt + +from bubbles.commands.ctq_utils import ( + CLAIMED_COLOR, + COMPLETED_COLOR, + FIGURE_DPI, + MAX_GRAPH_ENTRIES, + PRIMARY_COLOR, + QUEUE_POST_TIMEOUT, + SECONDARY_COLOR, + TEXT_COLOR, + UNCLAIMED_COLOR, + _convert_blossom_date, + _format_hour_duration, + _get_rank, + _reformat_figure, +) + +header_regex = re.compile( + r"^\s*\*(?P\w+)\s*Transcription:?(?:\s*(?P[^\n*]+))?\*", re.IGNORECASE +) + +# If one of the words on the right is included in the post type, +# take the word on the left as post type. +post_type_simplification_map = { + "Twitter": ["Twitter"], + "Facebook": ["Facebook"], + "Tumblr": ["Tumblr"], + "Reddit": ["Reddit"], + "Picture": ["Picture", "Photo"], + "Review": ["Review"], + "YouTube": ["YouTube", "You Tube"], + "Code": ["Code", "Program"], + "Chat": ["Chat", "Message", "Discord", "Email", "E-Mail"], + "Meme": ["Meme"], + "Comic": ["Comic"], + "Social Media": ["Social Media"], + "Image": ["Image"], + "Video": ["Video"], + "Text": ["Text"], +} + + +# Transcription template for a bar chart +BAR_CHART_TRANSCRIPTION = """### {title} + +\\[*A horizontal bar chart, showing "{x_label}" on the x-axis in {primary_color} \ +bars for each "{y_label}" on the y-axis:*] + +{entry_list}""" + +BAR_CHART_ENTRY = "- **{label}**: {value}" +BAR_CHART_SPECIAL_ENTRY = "- **{label}** \\[*in {secondary_color}*]: {value}" + +# Transcription template for a scatter plot +SCATTER_PLOT_TRANSCRIPTION = """### {title} + +\\[*A scatter plot, showing "{x_label}" on the x axis and "{y_label}" on the y axis.*] + +""" + + +def _get_username(post: Dict) -> str: + """Get the username for the given post.""" + return "u/" + post["user"]["username"] + + +def _get_subreddit_name(post: Dict) -> str: + """Get the subreddit name for the given post.""" + return "r/" + post["url"].split("/")[4] + + +def _get_transcription_characters(post: Dict) -> int: + """Get the number of characters in the transcription for the given post.""" + return len(post["transcription"]["text"]) + + +def _get_transcription_words(post: Dict) -> int: + """Get the number of words in the transcription for the given post.""" + return len(post["transcription"]["text"].split()) + + +def _escape_reddit_formatting(text: Any) -> str: + """Escape Reddit formatting.""" + return ( + str(text) + .replace("u/", r"u\/") + .replace("r/", r"r\/") + .replace("_", r"\_") + .replace("*", r"\*") + ) + + +def _get_post_format_and_type(post: Dict) -> Tuple[str, Optional[str]]: + """Determine the type of the post.""" + text: str = post["transcription"]["text"] + header = text.split("---")[0] + + match = header_regex.search(header) + if match is None: + print(f"Unrecognized post type: {header}") + return "Post", "Post" + + tr_format = match.group("format") + if tr_format: + tr_format = tr_format.strip() + tr_type = match.group("type") + if tr_type: + tr_type = tr_type.strip() + + return tr_format, tr_type + + +def _get_simplified_post_type(post: Dict) -> str: + """Get a simplified post type, grouping together multiple types.""" + post_format, post_type = _get_post_format_and_type(post) + post_type = post_type or post_format + + # Simplify the post type into common groups + for simple_type, words in post_type_simplification_map.items(): + for word in words: + if word.casefold() in post_type.casefold(): + return simple_type + + return post_type + + +def _get_event_stream(submissions: List[Dict]) -> List[Tuple[str, datetime]]: + """Get a list of events from the submissions. + + Each event is a tuple (,