html_to_markdown function

master
Bel LaPointe 2021-12-14 21:50:35 -07:00
parent 0d37159afb
commit 1fc54a4380
1 changed files with 12 additions and 8 deletions

20
poc.sh
View File

@ -29,19 +29,23 @@ original() {
for url in $(foo | grep -o 'https:..www.dndbeyond.com.compendium[^"]*' | sed 's/#.*//' | sort -u); do for url in $(foo | grep -o 'https:..www.dndbeyond.com.compendium[^"]*' | sed 's/#.*//' | sort -u); do
echo $url echo $url
foo $url \ foo $url \
| pandoc -f html -t markdown --ascii --toc --wrap=none --strip-comments -o - \ | html_to_markdown \
| sed '/^:::.*/d' \
| sed -e '/^# Chapter/p' -e '0,/^# Chapter/d' \
| sed -e '/^Share$/,$d' \
| sed 's/^\(\#\#*\) \[](#\([^)]*\))/\1 /' \
| sed 's/{#\([^ ]*\)[^}]*}/\n\n<a name="\1"><\/a>/' \
| sed 's/\[\([^]]*\)]\(([^)]*)\)*\({[^}]*}\)*/\1/g' \
| sed "s/\\\\\([\"']\)/\1/g" \
| less || notes-server -root $d | less || notes-server -root $d
break break
done done
} }
html_to_markdown() {
pandoc -f html -t markdown --ascii --toc --wrap=none --strip-comments -o - \
| sed '/^:::.*/d' \
| sed -e '/^# Chapter/p' -e '0,/^# Chapter/d' \
| sed -e '/^Share$/,$d' \
| sed 's/^\(\#\#*\) \[](#\([^)]*\))/\1 /' \
| sed 's/{#\([^ ]*\)[^}]*}/\n\n<a name="\1"><\/a>/' \
| sed 's/\[\([^]]*\)]\(([^)]*)\)*\({[^}]*}\)*/\1/g' \
| sed "s/\\\\\([\"']\)/\1/g"
}
if [ "$0" == "$BASH_SOURCE" ]; then if [ "$0" == "$BASH_SOURCE" ]; then
main "$@" main "$@"
fi fi