scrape_dndbeyond func

master
Bel LaPointe 2021-12-14 21:52:21 -07:00
parent 1fc54a4380
commit 0ca8669db3
1 changed files with 29 additions and 24 deletions

25
poc.sh
View File

@ -5,8 +5,21 @@ main() {
} }
original() { original() {
d=$(mktemp -d) for url in $(
foo() { scrape_dndbeyond https://www.dndbeyond.com/sources/phb \
| grep -o 'https:..www.dndbeyond.com.compendium[^"]*' \
| sed 's/#.*//' \
| sort -u
); do
echo $url
scrape_dndbeyond $url \
| html_to_markdown \
| less
break
done
}
scrape_dndbeyond() {
curl -L -sS \ curl -L -sS \
"${1:-https://www.dndbeyond.com/sources/phb}" \ "${1:-https://www.dndbeyond.com/sources/phb}" \
--compressed \ --compressed \
@ -25,14 +38,6 @@ original() {
-H 'Pragma: no-cache' \ -H 'Pragma: no-cache' \
-H 'Cache-Control: no-cache' \ -H 'Cache-Control: no-cache' \
-H 'TE: trailers'; -H 'TE: trailers';
}
for url in $(foo | grep -o 'https:..www.dndbeyond.com.compendium[^"]*' | sed 's/#.*//' | sort -u); do
echo $url
foo $url \
| html_to_markdown \
| less || notes-server -root $d
break
done
} }
html_to_markdown() { html_to_markdown() {