only hrefs when seeking chapter list
parent
1d21cead02
commit
95a66d1a18
21
poc.sh
21
poc.sh
|
|
@ -10,6 +10,11 @@ main() {
|
||||||
|
|
||||||
#set -x
|
#set -x
|
||||||
|
|
||||||
|
#list_chapters_in_book https://www.dndbeyond.com/sources/sacoc
|
||||||
|
#list_chapters_in_book https://www.dndbeyond.com/sources/twbtw
|
||||||
|
#list_chapters_in_book https://www.dndbeyond.com/sources/hftt
|
||||||
|
#list_chapters_in_book https://www.dndbeyond.com/sources/sdw
|
||||||
|
|
||||||
#should_scrape_book_chapter https://www.dndbeyond.com/sources/ai/credits || true
|
#should_scrape_book_chapter https://www.dndbeyond.com/sources/ai/credits || true
|
||||||
#scrape_book_chapter https://www.dndbeyond.com/sources/ai/credits
|
#scrape_book_chapter https://www.dndbeyond.com/sources/ai/credits
|
||||||
|
|
||||||
|
|
@ -22,7 +27,6 @@ main() {
|
||||||
|
|
||||||
#scrape_book_chapter https://www.dndbeyond.com/compendium/rules/phb/credits | less
|
#scrape_book_chapter https://www.dndbeyond.com/compendium/rules/phb/credits | less
|
||||||
|
|
||||||
#list_chapters_in_book https://www.dndbeyond.com/sources/sdw
|
|
||||||
|
|
||||||
#scrape_book_chapter https://www.dndbeyond.com/sources/phb/races | less
|
#scrape_book_chapter https://www.dndbeyond.com/sources/phb/races | less
|
||||||
#scrape_book_chapter https://www.dndbeyond.com/sources/phb/appendix-a-conditions | less
|
#scrape_book_chapter https://www.dndbeyond.com/sources/phb/appendix-a-conditions | less
|
||||||
|
|
@ -106,12 +110,23 @@ scrape_book_chapter() {
|
||||||
|
|
||||||
list_chapters_in_book() {
|
list_chapters_in_book() {
|
||||||
local book_url="${1%/}"
|
local book_url="${1%/}"
|
||||||
|
local book_domain="${book_url%%.com/*}.com"
|
||||||
|
log domain=$book_domain
|
||||||
|
local book_url_path="${book_url##*.com}"
|
||||||
|
book_url_path="${book_url_path%#*}"
|
||||||
|
|
||||||
|
if ! should_scrape_book_chapter "$book_url"; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
local raw="$(scrape_dndbeyond "$book_url")"
|
local raw="$(scrape_dndbeyond "$book_url")"
|
||||||
|
|
||||||
if echo "$raw" | grep -q 'https:..www.dndbeyond.com.compendium[^"]*'; then
|
if echo "$raw" | grep 'href="https:..www.dndbeyond.com.compendium[^"]*' &> /dev/null; then
|
||||||
echo "$raw" | grep -o 'https:..www.dndbeyond.com.compendium[^"]*'
|
echo "$raw" | grep -o 'https:..www.dndbeyond.com.compendium[^"]*'
|
||||||
elif echo "$raw" | grep -q "$book_url/"; then
|
elif echo "$raw" | grep "href=\"$book_url/" &> /dev/null; then
|
||||||
echo "$raw" | grep -o "${book_url//\//.}\/[^\"]*"
|
echo "$raw" | grep -o "${book_url//\//.}\/[^\"]*"
|
||||||
|
elif echo "$raw" | grep "href=\"${book_url_path//\//.}\\/" &> /dev/null; then
|
||||||
|
echo "$raw" | grep -o "href=\"${book_url_path//\//.}\/[^\"]*" | sed 's/href=\"//' | sed "s/^/${book_domain//\//\\/}/g"
|
||||||
else
|
else
|
||||||
echo "ERROR: FOUND NO CHAPTERS IN $book_url" >&2
|
echo "ERROR: FOUND NO CHAPTERS IN $book_url" >&2
|
||||||
return 1
|
return 1
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue