rate limit
parent
0ca8669db3
commit
52623ed8d9
22
poc.sh
22
poc.sh
|
|
@ -1,10 +1,10 @@
|
||||||
#! /bin/bash
|
#! /bin/bash
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
original "$@"
|
_main "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
original() {
|
_main() {
|
||||||
for url in $(
|
for url in $(
|
||||||
scrape_dndbeyond https://www.dndbeyond.com/sources/phb \
|
scrape_dndbeyond https://www.dndbeyond.com/sources/phb \
|
||||||
| grep -o 'https:..www.dndbeyond.com.compendium[^"]*' \
|
| grep -o 'https:..www.dndbeyond.com.compendium[^"]*' \
|
||||||
|
|
@ -20,6 +20,7 @@ original() {
|
||||||
}
|
}
|
||||||
|
|
||||||
scrape_dndbeyond() {
|
scrape_dndbeyond() {
|
||||||
|
rate_limit_1s scrape_dndbeyond
|
||||||
curl -L -sS \
|
curl -L -sS \
|
||||||
"${1:-https://www.dndbeyond.com/sources/phb}" \
|
"${1:-https://www.dndbeyond.com/sources/phb}" \
|
||||||
--compressed \
|
--compressed \
|
||||||
|
|
@ -51,6 +52,23 @@ html_to_markdown() {
|
||||||
| sed "s/\\\\\([\"']\)/\1/g"
|
| sed "s/\\\\\([\"']\)/\1/g"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rate_limit_1s() {
|
||||||
|
local name="$1"
|
||||||
|
local d="${TMPDIR:-/tmp}"/rate_limited
|
||||||
|
mkdir -p "$d"
|
||||||
|
local last_run="$(date -r "$d/$name" +%s)"
|
||||||
|
local now="$(date +%s)"
|
||||||
|
local interval=2
|
||||||
|
echo "$(date +%s): should sleep while $(($(date +%s)-last_run)) < $interval" >&2
|
||||||
|
if [ -f "$d/$name" ]; then
|
||||||
|
while [ "$(($(date +%s)-last_run))" -lt $interval ]; do
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
echo "$(date +%s): done sleeping" >&2
|
||||||
|
touch "$d/$name"
|
||||||
|
}
|
||||||
|
|
||||||
if [ "$0" == "$BASH_SOURCE" ]; then
|
if [ "$0" == "$BASH_SOURCE" ]; then
|
||||||
main "$@"
|
main "$@"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue