startup-cto-handbook-mdbook/scrape.sh

68 lines
1.3 KiB
Bash

#! /bin/bash
main() (
set -ueo pipefail
if [ ! -f ./raw.md.gz ]; then
wget -q -O - https://raw.githubusercontent.com/ZachGoldberg/Startup-CTO-Handbook/main/StartupCTOHandbook.md \
| gzip \
> ./raw.md.gz
fi
rm -f ./src/* || true
mkdir -p ./src
printf "# Summary\n\n" > ./src/SUMMARY.md
cat ./raw.md.gz \
| gzip -d \
| (
n=0
n_f() {
printf %03d.txt $n
}
while read -r line; do
local depth=$(
echo "$line" \
| grep -o -E '^[^a-z]*#+ ' \
| grep -o '#' \
| wc -l \
| awk '{print $1}'
)
case "$depth" in
1|2 )
n=$((n+1))
printf "%s- [%s](%s)\n" \
"$(
i=0
while ((i<depth-1)); do
printf " "
i=$((i+1))
done
)" \
"${line#* }" \
"$(n_f)" \
| tee -a ./src/SUMMARY.md >&2
;;
esac
echo "$line" | tee -a "./src/$(n_f)" &> /dev/null
done
)
mdbook build
)
log() {
echo "| $*" >&2
}
if [ "$0" == "$BASH_SOURCE" ]; then
main "$@"
ret=$?
echo ret=$? >&2
exit $ret
fi