diff --git a/app/crawler/main.sh b/app/crawler/main.sh index 14809fb..46f5f68 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -33,7 +33,8 @@ ids() { crawl() { local cache_key="crawled $*" - if cache get "$cache_key"; then + # TODO + if false && cache get "$cache_key"; then return fi _crawl "$@" | cache put "$cache_key" @@ -76,18 +77,30 @@ crawl_with() { local expanded=($($backend expand "$crawlable_source")) log expand $crawlable_source: - for i in $(seq 1 $(("${#expanded[@]}"-1))); do - export TITLE="$(echo "${expanded[i]}" | base64 --decode)" - export CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")" - export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')" - export PID="$(echo $json | jq -r .id)" + one() { + local i="$1" + export TITLE="$( + echo "$i" | base64 --decode + )" + export CONTENT="$( + $backend get "$crawlable_source" "$i" + )" + export ID="$( + echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}' + )" + export PID="$( + echo $json | jq -r .id + )" log " $PID/$ID ($TITLE): ${#CONTENT}" - push_crawled + push_crawled "$PID/$ID" "$TITLE" "$CONTENT" + } + for i in $(seq 1 $(("${#expanded[@]}"-1))); do + one "${expanded[i]}" done } push_crawled() { - notes put + notes put "$@" } is_crawlable() {