From 165f85a443a245ec739ac377fa1b16e091a4d098 Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Tue, 1 Feb 2022 14:12:06 -0700 Subject: [PATCH] mvp --- app/crawler/main.sh | 18 +++++++++--------- app/crawler/notea.sh | 12 ++++++------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/app/crawler/main.sh b/app/crawler/main.sh index 185e89d..48593c1 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -40,12 +40,12 @@ crawl() { } _crawl() { - log crawling $* + log "crawling $*" local id="$1" local json="$(notea get "$id")" local content="$(echo "$json" | jq -r .content)" if ! is_crawlable "$content"; then - log $content is not crawlable + log "$content is not crawlable" return 0 fi local crawlable_source="$(extract_crawlable_source "$content")" @@ -60,7 +60,7 @@ _crawl() { } extract_crawlable_source() { - echo "$*" | head -n 1 | awk '{print $NF}' + echo "$*" | head -n 1 | awk '{print $NF}' | sed 's/^$//' } crawl_with() { @@ -73,12 +73,12 @@ crawl_with() { local expanded=($($backend expand "$crawlable_source")) log expand $crawlable_source: for i in $(seq 1 $(("${#expanded[@]}"-1))); do - local title="$(echo "${expanded[i]}" | base64 --decode)" - CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")" \ - ID="$(echo "$crawlable_source/$title" | base64)" \ - PID="$(echo $json | jq -r .id)" \ - TITLE="$title" \ - notea put + export TITLE="$(echo "${expanded[i]}" | base64 --decode)" + export CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")" + export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')" + export PID="$(echo $json | jq -r .id)" + log " $PID/$ID ($TITLE): ${#CONTENT}" + notea put done log not impl crawl with diff --git a/app/crawler/notea.sh b/app/crawler/notea.sh index 4ec5ebe..474ea35 100644 --- a/app/crawler/notea.sh +++ b/app/crawler/notea.sh @@ -41,20 +41,20 @@ notea() ( local xsrf_token="$(echo "$contains_tokens" | grep -o '"csrfToken":[^,]*' | tr ':' '\n' | jq -r . | tail -n 1)" local xsrf_cookie="$(echo "$contains_tokens" | grep ^set.cookie: | sed 's/^set.cookie: //' | tr ';' '\n' | head -n 1)" local request="$(echo '{ - "content": '$(echo "$CONTENT" | jq -Rs)', + "content": '"$(printf "%s\n" "$CONTENT" | jq -Rs)"', "deleted": 0, - "id": '$(echo "$ID" | jq -R)', - "pid": '$(echo "$PID" | jq -R)', + "id": '"$(echo "$ID" | jq -R)"', + "pid": '"$(echo "$PID" | jq -R)"', "pinned": 0, "shared": 0, - "title": '$(echo "$TITLE" | jq -R)' + "title": '"$(echo "$TITLE" | jq -R)"' }' | jq -c .)" - ncurl \ + echo "$request" | ncurl \ -X POST \ -H "$xsrf_key: $xsrf_token" \ -b "$xsrf_cookie" \ -H "Content-Type: application/json" \ - -d "$request" \ + -d @- \ $NOTEA_ADDR/api/notes \ | grep -q "$ID" }