scrape_dndbeyond func
parent
1fc54a4380
commit
0ca8669db3
25
poc.sh
25
poc.sh
|
|
@ -5,8 +5,21 @@ main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
original() {
|
original() {
|
||||||
d=$(mktemp -d)
|
for url in $(
|
||||||
foo() {
|
scrape_dndbeyond https://www.dndbeyond.com/sources/phb \
|
||||||
|
| grep -o 'https:..www.dndbeyond.com.compendium[^"]*' \
|
||||||
|
| sed 's/#.*//' \
|
||||||
|
| sort -u
|
||||||
|
); do
|
||||||
|
echo $url
|
||||||
|
scrape_dndbeyond $url \
|
||||||
|
| html_to_markdown \
|
||||||
|
| less
|
||||||
|
break
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
scrape_dndbeyond() {
|
||||||
curl -L -sS \
|
curl -L -sS \
|
||||||
"${1:-https://www.dndbeyond.com/sources/phb}" \
|
"${1:-https://www.dndbeyond.com/sources/phb}" \
|
||||||
--compressed \
|
--compressed \
|
||||||
|
|
@ -25,14 +38,6 @@ original() {
|
||||||
-H 'Pragma: no-cache' \
|
-H 'Pragma: no-cache' \
|
||||||
-H 'Cache-Control: no-cache' \
|
-H 'Cache-Control: no-cache' \
|
||||||
-H 'TE: trailers';
|
-H 'TE: trailers';
|
||||||
}
|
|
||||||
for url in $(foo | grep -o 'https:..www.dndbeyond.com.compendium[^"]*' | sed 's/#.*//' | sort -u); do
|
|
||||||
echo $url
|
|
||||||
foo $url \
|
|
||||||
| html_to_markdown \
|
|
||||||
| less || notes-server -root $d
|
|
||||||
break
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
html_to_markdown() {
|
html_to_markdown() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue