diff --git a/scrape.sh b/scrape.sh
index 77a63da..0975ec0 100644
--- a/scrape.sh
+++ b/scrape.sh
@@ -238,6 +238,10 @@ html_to_markdown() {
local f="$(mktemp)"
log url=$1
cat > "$f"
+ #| perl -pe 's|
]*stat-block-ability-scores-data[^>]*>(.*?)
|\1|g' \
+ #| perl -pe 's|]*stat-block-ability-scores-heading[^>]*>(.*?)
|\1|g' \
+ #| perl -pe 's|]*stat-block-ability-scores-stat[^>]*>(.*?)
|\1
|g' \
+ #| readability "file://$(realpath $f)" \
local clean="$(
cat "$f" \
@@ -245,16 +249,13 @@ html_to_markdown() {
| tr '\n' '\r' \
| perl -pe 's|]*>(.*?)
|\1|g' \
| perl -pe 's|\r([^<]*)
\r|\r\1
\r|g' \
- | perl -pe 's|]*stat-block-ability-scores-data[^>]*>(.*?)
|\1|g' \
- | perl -pe 's|]*stat-block-ability-scores-heading[^>]*>(.*?)
|\1|g' \
- | perl -pe 's|]*stat-block-ability-scores-stat[^>]*>(.*?)
|\1
|g' \
- | perl -pe 's|]*stat-block-ability-scores[^>]*>(.*?)
|\1|g' \
+ | perl -pe 's|(.*?)
|\2|g' \
+ | perl -pe 's|\r*(?:(?!
).)+\r*||g' \
| tr '\r' '\n' \
| sed 's/
/ /g' \
| sed 's/colspan="[^"]*"//g' \
- | readability "file://$(realpath $f)" \
| tr '\n' '\r' \
- | perl -pe 's|([^<]*)
|\2|g' \
+ | perl -pe 's|(.*?)
|\2|g' \
| perl -pe 's|\r*(?:(?!
).)+\r*||g' \
| tr '\r' '\n' \
| pandoc \
@@ -290,7 +291,18 @@ html_to_markdown() {
| sed 's/::://g' \
| sed 's/&.dquo;/"/g' \
| sed "s/&.squo;/'/g" \
- | tr '\r' '\n'
+ | sed 's/—/--/g' \
+ | sed 's/–/-/g' \
+ | sed 's/×/*/g' \
+ | sed 's///g' \
+ | sed 's/{\.[^}]*}//g' \
+ | tr '\r' '\n' \
+ | sed 's/===+/===|/g' \
+ | sed 's/+===/|===/g' \
+ | sed 's/---+/---|/g' \
+ | sed 's/+---/|---/g' \
+ | grep -v '^|-[-|]*-|$' \
+ | sed '/^|[=|]*|$/s/=/-/g' \
#| sed 's/:::\(.*\):::/```\1```/g' \