diff --git a/app/crawler/main.sh b/app/crawler/main.sh index 185e89d..48593c1 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -40,12 +40,12 @@ crawl() { } _crawl() { - log crawling $* + log "crawling $*" local id="$1" local json="$(notea get "$id")" local content="$(echo "$json" | jq -r .content)" if ! is_crawlable "$content"; then - log $content is not crawlable + log "$content is not crawlable" return 0 fi local crawlable_source="$(extract_crawlable_source "$content")" @@ -60,7 +60,7 @@ _crawl() { } extract_crawlable_source() { - echo "$*" | head -n 1 | awk '{print $NF}' + echo "$*" | head -n 1 | awk '{print $NF}' | sed 's/^$//' } crawl_with() { @@ -73,12 +73,12 @@ crawl_with() { local expanded=($($backend expand "$crawlable_source")) log expand $crawlable_source: for i in $(seq 1 $(("${#expanded[@]}"-1))); do - local title="$(echo "${expanded[i]}" | base64 --decode)" - CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")" \ - ID="$(echo "$crawlable_source/$title" | base64)" \ - PID="$(echo $json | jq -r .id)" \ - TITLE="$title" \ - notea put + export TITLE="$(echo "${expanded[i]}" | base64 --decode)" + export CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")" + export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')" + export PID="$(echo $json | jq -r .id)" + log " $PID/$ID ($TITLE): ${#CONTENT}" + notea put done log not impl crawl with diff --git a/app/crawler/notea.sh b/app/crawler/notea.sh index 4ec5ebe..474ea35 100644 --- a/app/crawler/notea.sh +++ b/app/crawler/notea.sh @@ -41,20 +41,20 @@ notea() ( local xsrf_token="$(echo "$contains_tokens" | grep -o '"csrfToken":[^,]*' | tr ':' '\n' | jq -r . | tail -n 1)" local xsrf_cookie="$(echo "$contains_tokens" | grep ^set.cookie: | sed 's/^set.cookie: //' | tr ';' '\n' | head -n 1)" local request="$(echo '{ - "content": '$(echo "$CONTENT" | jq -Rs)', + "content": '"$(printf "%s\n" "$CONTENT" | jq -Rs)"', "deleted": 0, - "id": '$(echo "$ID" | jq -R)', - "pid": '$(echo "$PID" | jq -R)', + "id": '"$(echo "$ID" | jq -R)"', + "pid": '"$(echo "$PID" | jq -R)"', "pinned": 0, "shared": 0, - "title": '$(echo "$TITLE" | jq -R)' + "title": '"$(echo "$TITLE" | jq -R)"' }' | jq -c .)" - ncurl \ + echo "$request" | ncurl \ -X POST \ -H "$xsrf_key: $xsrf_token" \ -b "$xsrf_cookie" \ -H "Content-Type: application/json" \ - -d "$request" \ + -d @- \ $NOTEA_ADDR/api/notes \ | grep -q "$ID" }