From 92226f9aea91ce0ebe3b7ede5981f9559f5c66e4 Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Thu, 10 Feb 2022 08:35:42 -0700 Subject: [PATCH] sparkles --- app/crawler/main.sh | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/app/crawler/main.sh b/app/crawler/main.sh index 7a0243f..a780b45 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -41,7 +41,7 @@ crawl() { } _crawl() { - log "crawling $*" + log "crawling? $*" local id="$1" local content="$(notes get "$id")" local json="$( @@ -77,21 +77,42 @@ crawl_with() { local expanded=($($backend expand "$crawlable_source")) log expand $crawlable_source: + notes_mkdir_p() { + local id="$1" + local subtitle="$2" + if ! notes get "$id" | grep -q '^404 page not found$'; then + return + fi + notes put "$id" "$subtitle" "autogenerated content" + } one() { + encode() { + base64 | md5sum | cut -c 1-10 | awk '{print $1}' | tr -d '\n' + } local i="$1" export TITLE="$( echo "$i" | base64 --decode )" + TITLE="${TITLE##*/}" export CONTENT="$( $backend get "$crawlable_source" "$i" )" + local pid="$(echo $json | jq -r .id)" export ID="$( - echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}' + local sum="$pid/" + local title_so_far="" + for subtitle in $(echo "$i" | base64 --decode | tr '/' '\n' | while read -r subtitle; do echo "$subtitle" | base64; done); do + local subtitle="$(echo "$subtitle" | base64 --decode)" + if [ -n "$title_so_far" ]; then + notes_mkdir_p "${sum%/}" "${title_so_far%/}" >&2 + fi + sum+="$(echo "$subtitle" | encode)/" + title_so_far+="$subtitle/" + done + echo "$sum" )" - export PID="$( - echo $json | jq -r .id - )" - log " $PID/$ID ($TITLE): ${#CONTENT}" + ID="${ID%/}" + log " $ID ($TITLE): ${#CONTENT}" push_crawled "$PID/$ID" "$TITLE" "$CONTENT" } for i in $(seq 1 $(("${#expanded[@]}"-1))); do