fix for credits, add wip for not redoing books
parent
d6e7061738
commit
b4335f764a
17
poc.sh
17
poc.sh
|
|
@ -6,9 +6,11 @@ main() {
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
#set -x
|
#set -x
|
||||||
|
|
||||||
local output="$(mktemp -d)"
|
local output="${1:-$(mktemp -d)}"
|
||||||
log "$output"
|
log "$output"
|
||||||
|
|
||||||
|
#scrape_book_chapter https://www.dndbeyond.com/compendium/rules/phb/credits | less
|
||||||
|
|
||||||
#list_chapters_in_book https://www.dndbeyond.com/sources/sdw
|
#list_chapters_in_book https://www.dndbeyond.com/sources/sdw
|
||||||
|
|
||||||
#scrape_book_chapter https://www.dndbeyond.com/sources/phb/races | less
|
#scrape_book_chapter https://www.dndbeyond.com/sources/phb/races | less
|
||||||
|
|
@ -20,7 +22,6 @@ main() {
|
||||||
book="${book%/}"
|
book="${book%/}"
|
||||||
log book=$book
|
log book=$book
|
||||||
scrape_book "$output/${book##*/}" "$book"
|
scrape_book "$output/${book##*/}" "$book"
|
||||||
break
|
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -46,7 +47,13 @@ scrape_book() {
|
||||||
local output="$1"
|
local output="$1"
|
||||||
local book_url="$2"
|
local book_url="$2"
|
||||||
|
|
||||||
|
if [ -d "$output" ] && ls "$output"/* && [ ! -f "$output/.wip" ]; then
|
||||||
|
log "$book_url already in $output"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
mkdir -p "$output"
|
mkdir -p "$output"
|
||||||
|
touch "$output/.wip"
|
||||||
local f="$(mktemp)"
|
local f="$(mktemp)"
|
||||||
|
|
||||||
for chapter in $(list_chapters_in_book "$book_url"); do
|
for chapter in $(list_chapters_in_book "$book_url"); do
|
||||||
|
|
@ -61,7 +68,10 @@ scrape_book() {
|
||||||
mv "$f" "$output"/"$title"
|
mv "$f" "$output"/"$title"
|
||||||
done
|
done
|
||||||
|
|
||||||
rm "$f"
|
if [ -f "$f" ]; then
|
||||||
|
rm "$f"
|
||||||
|
fi
|
||||||
|
rm "$output/.wip"
|
||||||
}
|
}
|
||||||
|
|
||||||
scrape_book_chapter() {
|
scrape_book_chapter() {
|
||||||
|
|
@ -122,6 +132,7 @@ html_to_markdown() {
|
||||||
(echo "$out" | grep -q '^##* Appendix' && echo Appendix) \
|
(echo "$out" | grep -q '^##* Appendix' && echo Appendix) \
|
||||||
|| (echo "$out" | grep -q '^##* Chapter' && echo Chapter) \
|
|| (echo "$out" | grep -q '^##* Chapter' && echo Chapter) \
|
||||||
|| (echo "$out" | grep -q '^##* Introduction' && echo Introduction) \
|
|| (echo "$out" | grep -q '^##* Introduction' && echo Introduction) \
|
||||||
|
|| (echo "$out" | grep -q '^##* Credits' && echo Credits) \
|
||||||
|| echo Chapter
|
|| echo Chapter
|
||||||
)"
|
)"
|
||||||
echo "$out" \
|
echo "$out" \
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue