master
Bel LaPointe 2022-02-01 14:12:06 -07:00
parent c9e3f4e416
commit 165f85a443
2 changed files with 15 additions and 15 deletions

View File

@ -40,12 +40,12 @@ crawl() {
} }
_crawl() { _crawl() {
log crawling $* log "crawling $*"
local id="$1" local id="$1"
local json="$(notea get "$id")" local json="$(notea get "$id")"
local content="$(echo "$json" | jq -r .content)" local content="$(echo "$json" | jq -r .content)"
if ! is_crawlable "$content"; then if ! is_crawlable "$content"; then
log $content is not crawlable log "$content is not crawlable"
return 0 return 0
fi fi
local crawlable_source="$(extract_crawlable_source "$content")" local crawlable_source="$(extract_crawlable_source "$content")"
@ -60,7 +60,7 @@ _crawl() {
} }
extract_crawlable_source() { extract_crawlable_source() {
echo "$*" | head -n 1 | awk '{print $NF}' echo "$*" | head -n 1 | awk '{print $NF}' | sed 's/^<//' | sed 's/>$//'
} }
crawl_with() { crawl_with() {
@ -73,12 +73,12 @@ crawl_with() {
local expanded=($($backend expand "$crawlable_source")) local expanded=($($backend expand "$crawlable_source"))
log expand $crawlable_source: log expand $crawlable_source:
for i in $(seq 1 $(("${#expanded[@]}"-1))); do for i in $(seq 1 $(("${#expanded[@]}"-1))); do
local title="$(echo "${expanded[i]}" | base64 --decode)" export TITLE="$(echo "${expanded[i]}" | base64 --decode)"
CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")" \ export CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")"
ID="$(echo "$crawlable_source/$title" | base64)" \ export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')"
PID="$(echo $json | jq -r .id)" \ export PID="$(echo $json | jq -r .id)"
TITLE="$title" \ log " $PID/$ID ($TITLE): ${#CONTENT}"
notea put notea put
done done
log not impl crawl with log not impl crawl with

View File

@ -41,20 +41,20 @@ notea() (
local xsrf_token="$(echo "$contains_tokens" | grep -o '"csrfToken":[^,]*' | tr ':' '\n' | jq -r . | tail -n 1)" local xsrf_token="$(echo "$contains_tokens" | grep -o '"csrfToken":[^,]*' | tr ':' '\n' | jq -r . | tail -n 1)"
local xsrf_cookie="$(echo "$contains_tokens" | grep ^set.cookie: | sed 's/^set.cookie: //' | tr ';' '\n' | head -n 1)" local xsrf_cookie="$(echo "$contains_tokens" | grep ^set.cookie: | sed 's/^set.cookie: //' | tr ';' '\n' | head -n 1)"
local request="$(echo '{ local request="$(echo '{
"content": '$(echo "$CONTENT" | jq -Rs)', "content": '"$(printf "%s\n" "$CONTENT" | jq -Rs)"',
"deleted": 0, "deleted": 0,
"id": '$(echo "$ID" | jq -R)', "id": '"$(echo "$ID" | jq -R)"',
"pid": '$(echo "$PID" | jq -R)', "pid": '"$(echo "$PID" | jq -R)"',
"pinned": 0, "pinned": 0,
"shared": 0, "shared": 0,
"title": '$(echo "$TITLE" | jq -R)' "title": '"$(echo "$TITLE" | jq -R)"'
}' | jq -c .)" }' | jq -c .)"
ncurl \ echo "$request" | ncurl \
-X POST \ -X POST \
-H "$xsrf_key: $xsrf_token" \ -H "$xsrf_key: $xsrf_token" \
-b "$xsrf_cookie" \ -b "$xsrf_cookie" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d "$request" \ -d @- \
$NOTEA_ADDR/api/notes \ $NOTEA_ADDR/api/notes \
| grep -q "$ID" | grep -q "$ID"
} }