diff --git a/crawler/main.sh b/crawler/main.sh index b092603..16d82d7 100644 --- a/crawler/main.sh +++ b/crawler/main.sh @@ -20,6 +20,8 @@ config() { export CACHE_DURATION=$((60*50)) export NOTES_ADDR="${NOTES_ADDR:-"http://localhost:3004"}" export GITLAB_PAT="$GITLAB_PAT" + export RCLONE_CONFIG="$RCLONE_CONFIG" + export RCLONE_CONFIG_PASS="$RCLONE_CONFIG_PASS" source ./gitlab.sh source ./gitlab_wiki.sh source ./google.sh diff --git a/crawler/rclone.sh b/crawler/rclone.sh new file mode 100644 index 0000000..95b766f --- /dev/null +++ b/crawler/rclone.sh @@ -0,0 +1,54 @@ +#! /bin/bash + +rclone() ( + get_google() { + _rate_limit + local id="$1" + local out="$(mktemp -d)" + _cmd backend copyid work-notes-google: --drive-export-formats=csv,html,pdf "$id" "$out/" + find "$out" -type f + } + + _rate_limit() { + local f="/tmp/rclone.rate.limit" + local last=0 + if [ -f "$f" ]; then + last="$(date -r "$f" +%s)" + fi + local now="$(date +%s)" + local since_last=$((now-last)) + if ((since_last>2)); then + dur=-2 + fi + dur=$((dur+2)) + sleep $dur + touch "$f" + } + + _ensure() { + which rclone &> /dev/null && rclone version &> /dev/null + } + + _cmd() { + _ensure_google_config + __cmd "$@" + } + + __cmd() { + _ensure + RCLONE_CONFIG_PASS="$RCLONE_CONFIG_PASS" \ + $(which rclone) \ + --config "$RCLONE_CONFIG" \ + --size-only \ + --fast-list \ + --retries 10 \ + --retries-sleep 10s \ + "$@" + } + + _ensure_google_config() { + __cmd config show | grep -q work-notes-google + } + + "$@" +)