impl crawler rclone wrapper to get google files by id

master
Bel LaPointe 2022-02-16 13:53:01 -07:00
parent 3774d3eba1
commit c85813ad76
2 changed files with 56 additions and 0 deletions

View File

@ -20,6 +20,8 @@ config() {
export CACHE_DURATION=$((60*50)) export CACHE_DURATION=$((60*50))
export NOTES_ADDR="${NOTES_ADDR:-"http://localhost:3004"}" export NOTES_ADDR="${NOTES_ADDR:-"http://localhost:3004"}"
export GITLAB_PAT="$GITLAB_PAT" export GITLAB_PAT="$GITLAB_PAT"
export RCLONE_CONFIG="$RCLONE_CONFIG"
export RCLONE_CONFIG_PASS="$RCLONE_CONFIG_PASS"
source ./gitlab.sh source ./gitlab.sh
source ./gitlab_wiki.sh source ./gitlab_wiki.sh
source ./google.sh source ./google.sh

54
crawler/rclone.sh Normal file
View File

@ -0,0 +1,54 @@
#! /bin/bash
rclone() (
get_google() {
_rate_limit
local id="$1"
local out="$(mktemp -d)"
_cmd backend copyid work-notes-google: --drive-export-formats=csv,html,pdf "$id" "$out/"
find "$out" -type f
}
_rate_limit() {
local f="/tmp/rclone.rate.limit"
local last=0
if [ -f "$f" ]; then
last="$(date -r "$f" +%s)"
fi
local now="$(date +%s)"
local since_last=$((now-last))
if ((since_last>2)); then
dur=-2
fi
dur=$((dur+2))
sleep $dur
touch "$f"
}
_ensure() {
which rclone &> /dev/null && rclone version &> /dev/null
}
_cmd() {
_ensure_google_config
__cmd "$@"
}
__cmd() {
_ensure
RCLONE_CONFIG_PASS="$RCLONE_CONFIG_PASS" \
$(which rclone) \
--config "$RCLONE_CONFIG" \
--size-only \
--fast-list \
--retries 10 \
--retries-sleep 10s \
"$@"
}
_ensure_google_config() {
__cmd config show | grep -q work-notes-google
}
"$@"
)