diff --git a/app/crawler/main.sh b/app/crawler/main.sh index ea232df..f02ec98 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -3,9 +3,7 @@ main() { config for id in $(ids); do - if should_crawl "$id"; then - crawl "$id" - fi + crawl "$id" rewrite "$id" done } @@ -15,7 +13,6 @@ config() { set -e export CACHE="${CACHE:-"$(mktemp -d)"}" export CACHE_DURATION=$((60*5)) - export CRAWL_INTERVAL=$((60*5)) export NOTEA_ADDR="${NOTEA_ADDR:-"http://localhost:3000"}" export GITLAB_PAT="$GITLAB_PAT" source ./gitlab.sh @@ -29,20 +26,15 @@ ids() { notea ids } -should_crawl() { - local f="$CACHE/crawled.$1" - if [ ! -f "$f" ]; then - return 0 +crawl() { + local cache_key="crawled $*" + if cache get "$cache_key"; then + return fi - local last_crawled=$(date -r "$f" +%s) - local now=$(date +%s) - if ((now-last_crawled < CRAWL_INTERVAL)); then - return 0 - fi - return 1 + _crawl "$@" | cache put "$cache_key" } -crawl() { +_crawl() { local id="$1" local json="$(notea get "$id")" local content="$(echo "$json" | jq -r .content)"