diff --git a/poc.sh b/poc.sh index fdd3758..fd8d0be 100644 --- a/poc.sh +++ b/poc.sh @@ -6,9 +6,11 @@ main() { set -o pipefail #set -x - local output="$(mktemp -d)" + local output="${1:-$(mktemp -d)}" log "$output" + #scrape_book_chapter https://www.dndbeyond.com/compendium/rules/phb/credits | less + #list_chapters_in_book https://www.dndbeyond.com/sources/sdw #scrape_book_chapter https://www.dndbeyond.com/sources/phb/races | less @@ -20,7 +22,6 @@ main() { book="${book%/}" log book=$book scrape_book "$output/${book##*/}" "$book" - break done @@ -46,7 +47,13 @@ scrape_book() { local output="$1" local book_url="$2" + if [ -d "$output" ] && ls "$output"/* && [ ! -f "$output/.wip" ]; then + log "$book_url already in $output" + return + fi + mkdir -p "$output" + touch "$output/.wip" local f="$(mktemp)" for chapter in $(list_chapters_in_book "$book_url"); do @@ -61,7 +68,10 @@ scrape_book() { mv "$f" "$output"/"$title" done - rm "$f" + if [ -f "$f" ]; then + rm "$f" + fi + rm "$output/.wip" } scrape_book_chapter() { @@ -122,6 +132,7 @@ html_to_markdown() { (echo "$out" | grep -q '^##* Appendix' && echo Appendix) \ || (echo "$out" | grep -q '^##* Chapter' && echo Chapter) \ || (echo "$out" | grep -q '^##* Introduction' && echo Introduction) \ + || (echo "$out" | grep -q '^##* Credits' && echo Credits) \ || echo Chapter )" echo "$out" \