diff --git a/renderer.sh b/renderer.sh
index da71758..0712efd 100755
--- a/renderer.sh
+++ b/renderer.sh
@@ -9,27 +9,62 @@
##############################################################################
-## may consider having it output as gfm and then having rich do it with -m --hyperlinks
-
+#!/bin/sh
+
+##############################################################################
+#
+# My renderer for newsboat/newsbeuter/mutt
+# (c) Steven Saus 2024
+# Licensed under the MIT license
+#
+##############################################################################
+
+# For reference:
+
+# using pup to clean it up, then to select divs that are supposed to be hidden and storing them in a variable.
+# then taking the input, and removing those divs (yay grep)
+# removing empty divs
+# removing images (seriously, it's a MESS otherwise -- perhaps show those links at the bottom later?
+# changing em and strong tags to a UTF character for highlighting regex matches in neomutt/newsbeuter. (It doesn't work multiline, but hey)
+# (also, regular markdown characters don't work, so I don't forget AGAIN and try AGAIN)
+# adding a br after each table row so it doesn't become collapsed and we have SOME whitespace
+# considering -nonumbers to clean up the body text, but...
+
if [ $# -eq 0 ]; then
# no arguments passed, use stdin
input=$(cat)
- echo "${input}" | sed -e 's/
]*>//g' | sed -e 's/
]*>//g' | sed -e 's///g'| sed -e 's/
]*>/⬞/g' | sed -e 's/<\/em>/⬞/g' | sed -e 's/]*>/⬞/g' | sed -e 's/<\/strong>/⬞/g' | hxclean | hxnormalize -e -L -s 2>/dev/null | hxunent | iconv -t utf-8//TRANSLIT - | elinks -dump -no-references -no-numbering -dump-charset UTF-8 -dump-width 140 | grep -v "READ MORE:"
-# echo "" > /home/steven/reference_article.txt
-# echo $input | sed -e 's/
]*>//g' | sed -e 's/]*>//g' | sed -e 's///g' | hxunent | iconv -t utf-8//TRANSLIT - >> /home/steven/reference_article.txt
-# echo "" >> /home/steven/reference_article.txt
-else
+ antimatch=""
+ antimatch=$(echo "${input}" | pup 'div[style*="display: none;"],div[style*="display:none;"], div[style*="visibility: hidden;"], div[style*="overflow: hidden;"]')
+ if [ "$antimatch" != "" ];then
+ echo " "
+ echo "${input}" | pup | grep -vF "${antimatch}" | sed -e 's/
]*>//g' | sed 's/
![]()
]\+>//g' | sed -e 's///g'| sed -e 's/
]*>/⬞/g' | sed -e 's/<\/em>/⬞/g' | sed -e 's/]*>/⬞/g' | sed -e 's/<\/strong>/⬞/g' | sed -e 's/<\/tr>/<\/tr>
/g'| hxclean | hxnormalize -e -L -s 2>/dev/null | hxunent | lynx -dump -stdin -assume_charset=UTF-8 -force_empty_hrefless_a -hiddenlinks=ignore -html5_charsets -dont_wrap_pre -nolist -width=140 -collapse_br_tags | grep -v "READ MORE:"
+ else
+ echo " "
+ echo "${input}" | pup | sed -e 's/]*>//g' | sed 's/
![]()
]\+>//g' | sed -e 's///g'| sed -e 's/
]*>/⬞/g' | sed -e 's/<\/em>/⬞/g' | sed -e 's/]*>/⬞/g' | sed -e 's/<\/strong>/⬞/g' | sed -e 's/<\/tr>/<\/tr>
/g'| hxclean | hxnormalize -e -L -s 2>/dev/null | hxunent | lynx -dump -stdin -assume_charset=UTF-8 -force_empty_hrefless_a -hiddenlinks=ignore -html5_charsets -dont_wrap_pre -nolist -width=140 -collapse_br_tags | grep -v "READ MORE:"
+ fi
+ else
# it's a URL, pass to elinks directly
if [ $(echo "${1}" | grep -c http) -gt 0 ];then
- elinks "${1}" -dump -no-references -no-numbering -dump-charset UTF-8 -dump-width 140
+ elinks "${1}" -dump -dump-charset UTF-8 -dump-width 140
exit
fi
# it's a file, parse it this way
if [ -f "${1}" ];then
input=$(cat "${1}")
- echo $input | sed -e 's/
]*>//g' | sed -e 's/]*>//g' | hxclean | hxnormalize -e -L -s 2>/dev/null | tidy -quiet -omit -clean 2>/dev/null | hxunent | iconv -t utf-8//TRANSLIT - | elinks -dump -no-references -no-numbering -dump-charset UTF-8 -dump-width 140
+ # this is to deal with stupid hidden divs in HTML emails
+ # should only invoke when it's there; otherwise it craps out the whole thing.
+ echo "${input}" > /home/steven/tmp/shit.txt
+ antimatch=""
+ antimatch=$(echo "${input}" | pup 'div[style*="display: none;"],div[style*="display:none;"],div[style*="visibility: hidden;"],div[style*="overflow: hidden;"]')
+ if [ "$antimatch" != "" ];then
+ echo " "
+ echo "${input}" | pup | grep -vF "${antimatch}" | sed -e 's/
]*>//g' | sed 's/
![]()
]\+>//g' | sed -e 's///g'| sed -e 's/
]*>/⬞/g' | sed -e 's/<\/em>/⬞/g' | sed -e 's/]*>/⬞/g' | sed -e 's/<\/strong>/⬞/g' | sed -e 's/<\/tr>/<\/tr>
/g' | hxclean | hxnormalize -e -L -s 2>/dev/null | hxunent | lynx -dump -stdin -assume_charset=UTF-8 -force_empty_hrefless_a -underscore -hiddenlinks=ignore -html5_charsets -dont_wrap_pre -nolist -width=140 | grep -v "READ MORE:"
+ else
+ echo " "
+ echo "${input}" | pup | sed -e 's/]*>//g' | sed 's/
![]()
]\+>//g' | sed -e 's///g'| sed -e 's/
]*>/⬞/g' | sed -e 's/<\/em>/⬞/g' | sed -e 's/]*>/⬞/g' | sed -e 's/<\/strong>/⬞/g' | sed -e 's/<\/tr>/<\/tr>
/g'| hxclean | hxnormalize -e -L -s 2>/dev/null | hxunent | lynx -dump -stdin -assume_charset=UTF-8 -force_empty_hrefless_a -hiddenlinks=ignore -html5_charsets -dont_wrap_pre -nolist -width=140 -collapse_br_tags | grep -v "READ MORE:"
+ fi
fi
-
fi
+
diff --git a/renderer_links.sh b/renderer_links.sh
index efb7512..b40e6f7 100755
--- a/renderer_links.sh
+++ b/renderer_links.sh
@@ -53,7 +53,6 @@ if [ $# -eq 0 ]; then
echo "${input}" | pup | sed -e 's/]*>//g' | sed 's/
![]()
]\+>//g' | sed -e 's///g'| sed -e 's/
]*>/⬞/g' | sed -e 's/<\/em>/⬞/g' | sed -e 's/]*>/⬞/g' | sed -e 's/<\/strong>/⬞/g' | sed -e 's/<\/tr>/<\/tr>
/g'| hxclean | hxnormalize -e -L -s 2>/dev/null | hxunent | lynx -dump -stdin -assume_charset=UTF-8 -force_empty_hrefless_a -hiddenlinks=ignore -html5_charsets -dont_wrap_pre -width=140 -collapse_br_tags | grep -v "READ MORE:"
fi
fi
-
fi