From 1fc54a4380cf09f50e20b496f1aecfe419c8a495 Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Tue, 14 Dec 2021 21:50:35 -0700 Subject: [PATCH] html_to_markdown function --- poc.sh | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/poc.sh b/poc.sh index 0541aaa..72d35be 100644 --- a/poc.sh +++ b/poc.sh @@ -29,19 +29,23 @@ original() { for url in $(foo | grep -o 'https:..www.dndbeyond.com.compendium[^"]*' | sed 's/#.*//' | sort -u); do echo $url foo $url \ - | pandoc -f html -t markdown --ascii --toc --wrap=none --strip-comments -o - \ - | sed '/^:::.*/d' \ - | sed -e '/^# Chapter/p' -e '0,/^# Chapter/d' \ - | sed -e '/^Share$/,$d' \ - | sed 's/^\(\#\#*\) \[](#\([^)]*\))/\1 /' \ - | sed 's/{#\([^ ]*\)[^}]*}/\n\n<\/a>/' \ - | sed 's/\[\([^]]*\)]\(([^)]*)\)*\({[^}]*}\)*/\1/g' \ - | sed "s/\\\\\([\"']\)/\1/g" \ + | html_to_markdown \ | less || notes-server -root $d break done } +html_to_markdown() { + pandoc -f html -t markdown --ascii --toc --wrap=none --strip-comments -o - \ + | sed '/^:::.*/d' \ + | sed -e '/^# Chapter/p' -e '0,/^# Chapter/d' \ + | sed -e '/^Share$/,$d' \ + | sed 's/^\(\#\#*\) \[](#\([^)]*\))/\1 /' \ + | sed 's/{#\([^ ]*\)[^}]*}/\n\n<\/a>/' \ + | sed 's/\[\([^]]*\)]\(([^)]*)\)*\({[^}]*}\)*/\1/g' \ + | sed "s/\\\\\([\"']\)/\1/g" +} + if [ "$0" == "$BASH_SOURCE" ]; then main "$@" fi