diff --git a/scrape.sh b/scrape.sh index 0975ec0..37f330d 100644 --- a/scrape.sh +++ b/scrape.sh @@ -267,6 +267,7 @@ html_to_markdown() { --wrap=none \ --strip-comments \ -o - \ + | grep -v '^\+[-+]*\+$' \ | sed 's/{[^}]*data[^}]*}//g' \ | sed 's/\[](#[^)]*)//' \ | cat \ @@ -301,9 +302,11 @@ html_to_markdown() { | sed 's/+===/|===/g' \ | sed 's/---+/---|/g' \ | sed 's/+---/|---/g' \ - | grep -v '^|-[-|]*-|$' \ | sed '/^|[=|]*|$/s/=/-/g' \ + #| grep -v '^|-[-|]*-|$' \ + #| sed '/ |\r|-[-|]*-|\r| /s/-/=/g' \ + #| sed 's/\r\r| \([^\r]*\) |\r|[^=]/\r\r| \1 |\r|=============================|\r|/g' \ #| sed 's/:::\(.*\):::/```\1```/g' \ rm "$f"