Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UseGalaxy Help Forum to Matrix channel Bot #4603

Merged
merged 8 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/scrna-galaxyhelp-matrix.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: "[Cron] Send Single-cell Galaxy Help Topics to Matrix CoP Room"

on:
schedule:
# * is a special character in YAML so you have to quote this string
# We'll run this every monday at noon
- cron: '0 12 * * 1'
workflow_dispatch:

jobs:
runner-job:
if: github.repository_owner == 'galaxyproject'
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

# BEGIN Dependencies
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y coreutils curl libxml2-utils gawk sed grep jq wget
# END Dependencies

- name: Send announcements to single-cell matrix for unanswered posts
run: |
wget https://raw.githubusercontent.com/galaxyproject/training-material/main/bin/galaxy-help-news.sh
HTML_TYPE="bullets" MAX_REPLIES="0" WANTED_TAGS="scrna scrna-seq" ROOM_ID='!TJRLNvfcbWbSRoUNpl:matrix.org' bash ./galaxy-help-news.sh
env:
MATRIX_ACCESS_TOKEN: ${{ secrets.matrix_access_token }}
190 changes: 190 additions & 0 deletions bin/galaxy-help-news.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
#!/bin/bash

MATRIX_SERVER=${MATRIX_SERVER:-"https://matrix.org"}
ROOM_ID=${ROOM_ID:-'!TJRLNvfcbWbSRoUNpl:matrix.org'} ## GTN Single Cell Maintainers
WANTED_TAGS=${WANTED_TAGS:-"scrna scrna-seq"}
MAX_REPLIES=${MAX_REPLIES:-1}
HTML_TYPE=${HTML_TYPE:-"bullets"} ## "table"

## Result filters
OPTS=${OPTS:-"?ascending=true&order=activity"}

if [ -z "$MATRIX_ACCESS_TOKEN" ]; then
echo "
This is a Matrix bot that scrapes Galaxy Help for certain tags and posts to
a Room for topics that have less than X replies. Run this maybe once a month.

Example Usage:

MATRIX_ACCESS_TOKEN='123_123_123' \\
MATRIX_SERVER='https://matrix.org' \\
ROOM_ID='!123_132_123:matrix.org' \\
WANTED_TAGS='tag1 tag2' \\
MAX_REPLIES=1 \\
HTML_TYPE='bullets' \\
bash $0

Where:
MATRIX_ACCESS_TOKEN Can be found in your Matrix profile under
'All settings' -> 'Help & About' -> 'Access Token'

MATRIX_SERVER The name or base address of the Matrix server to
post to. Default is '$MATRIX_SERVER'

ROOM_ID The Room ID can be found in the URL of the room
usually following format '!123123123:matrix.org'.
Default is '$ROOM_ID'
NOTE: Single quotes are very important here.

WANTED_TAGS A space separated list of valid tags to find posts
at https://help.galaxyproject.org/
Default is \"$WANTED_TAGS\"

MAX_REPLIES Filter for posts that have less than or equal to
this many replies. Default is \"$MAX_REPLIES\"

HTML_TYPE Render either a 'table' or 'bullets'. HTML tables
look great in the browser but don't render well on
mobile. Default is \"$HTML_TYPE\"

OPTS Extra arguments to append to help.galaxyproject.org
URL. Default is \"$OPTS\"
" >&2
exit 255
fi

function tag_to_tsv {
## For a given TAG, fetch from the help forum, extract and parse
## the table and produce a 4-column TSV output of Link, Title,
## Replies, Views.
##
## TODO: Add date too?
local tag="$1"
curl -s "https://help.galaxyproject.org/tag/${tag}${OPTS}" \
| xmllint --noblanks --html --xpath '//tr[@class="topic-list-item"]/td/span' - 2>/dev/null \
| sed -r 's|<span class[^>]+>||; s|</span>||; s|\s*<a.*href=\"([^\"]*)\" [^>]+>([^<]+)<.*|_ROW_\1\t\2|' \
| tr '\n' '\t' \
| sed -r 's|\s\s\s*|\t|g; s|_ROW_|\n|g'
}

function alltags_to_tsv {
## For all wanted tags, populate a 4-column TSV output of Link,
## Title, Replies, Views, and return the path of the table.
local fetch_tags=$WANTED_TAGS
local tmp_tsv
tmp_tsv=$(mktemp --suffix=".tsv")
for tag in ${fetch_tags}; do
tag_to_tsv "$tag" >> "$tmp_tsv";
done
## No duplicates, no blanks, no duplicate delimiters,
## and sort by ascending reply count
grep -v "^\s*$" "${tmp_tsv}" | sed 's|\t\t|\t|g' \
| sort | uniq | sort -t $'\t' -nk 3 > "${tmp_tsv}".temp
echo -e "Link\tTitle\tReplies\tViews" > "${tmp_tsv}"
cat "${tmp_tsv}".temp >> "${tmp_tsv}"
rm "${tmp_tsv}".temp
echo "${tmp_tsv}"
}

function filter_tsv {
## Filter a TSV file for maximum replies and then return the path
## of the new filtered table
local tsv="$1"
local tmp_tsv
tmp_tsv=$(mktemp --suffix=".tsv")
awk -F$'\t' -v replies="$MAX_REPLIES" '$3 <= replies' "${tsv}" > "${tmp_tsv}"
echo "${tmp_tsv}"
}

function tsv_to_html {
## Convert a TSV table into HTML text that can be fed into a JSON
local tsv="$1"
if [ "$HTML_TYPE" = "table" ]; then
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies &le; ${MAX_REPLIES}" '\
BEGIN { print "<h1>Updates from Galaxy Help</h1>"subtitle"\n<table>\n<thead><tr><th>Topic</th><th>Replies</th><th>Views</th></tr></thead>\n<tbody>"} \
END { print "</tbody>\n</table>"} \
NR > 0 {print "<tr><td><a href=\""$1"\">"$2"</a></td><td>"$3"</td><td>"$4"</td></tr>"}' \
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
else ## bullets
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies &le; ${MAX_REPLIES}" '\
BEGIN { print "<h1>Updates from Galaxy Help</h1><br/><p>"subtitle"</p><ol>\n"} \
END { print "\n</ol>"} \
NR > 0 {print "<li><a href=\""$1"\">"$2"</a><ul><li>Replies: "$3" and Views: "$4"</li></ul></li>"}' \
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
fi
}

function tsv_to_markdown {
## Convert a TSV table into Markdown text that can be fed into a JSON
local tsv="$1"
awk -F$'\t' -v subtitle="Recent posts matching: **${WANTED_TAGS}**, with replies ≤ ${MAX_REPLIES}" '\
BEGIN { print "## Updates from Galaxy Help\\n***"subtitle"***\\n"} \
NR > 0 {print "* ["$2"]("$1")\\n * "$3" replies and "$4" views\\n"}' \
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
}

function md_and_html_to_json {
## Stuff the Markdown and HTML text content into a JSON.
local md_text="$1"
local html_text="$2"
local tmp_json;
tmp_json=$(mktemp --suffix=".json")
## See: https://spec.matrix.org/legacy/r0.0.0/client_server.html
## "put-matrix-client-r0-rooms-roomid-send-eventtype-txnid"
echo "{\
\"msgtype\":\"m.notice\", \
\"format\":\"org.matrix.custom.html\", \
\"body\": \"${md_text}\", \
\"formatted_body\": \"${html_text}\"}" > "${tmp_json}"
echo "${tmp_json}"
}

function post_json_to_matrix {
local json_file="$1"
local txnid post_url
txnid=$(date "+%Y%m%d%H%M${RANDOM:1:3}") ## date-specific transaction ID
MATRIX_SERVER=${MATRIX_SERVER%/} ## remove trailing slash, if any
## Build curl
post_url="${MATRIX_SERVER}/_matrix/client/r0/rooms/"
post_url="${post_url}"${ROOM_ID}"/send/m.room.message/${txnid}"
post_url="${post_url}?access_token=${MATRIX_ACCESS_TOKEN}"
## DEBUG:
## - curl "$post_url" -X PUT --data '{"msgtype":"m.text","body":"hello"}'
curl "$post_url" -X PUT --data "$(cat ${json_file})"
}

function sanity_check {
## Assert that required binaries are in PATH
local required_progs=( cat curl xmllint awk sed grep tr jq )
local miss=""
for prog in "${required_progs[@]}"; do
if ! which "${prog}" 2>/dev/null >&2; then
miss="$miss $prog"
fi
done
if [ "$miss" != "" ]; then
echo "Cannot run without:$miss"
exit 255
fi
}

## MAIN ##
sanity_check

main_tsv=$(filter_tsv "$(alltags_to_tsv)" )
if [[ $(wc -l < "${main_tsv}") == 0 ]]; then
echo "Nothing new to post, aborting." >&2
exit 0
fi

main_mdwn_text=$(tsv_to_markdown "${main_tsv}")
main_html_text=$(tsv_to_html "${main_tsv}")

main_json_file=$(md_and_html_to_json "${main_mdwn_text}" "${main_html_text}")
if ! jq < "${main_json_file}" 2> /dev/null >&2; then
echo "This is not a valid JSON, aborting." >&2
echo "See: ${main_json_file}" >&2
exit 255
fi

post_json_to_matrix "${main_json_file}"