mvp
parent
c9e3f4e416
commit
165f85a443
|
|
@ -40,12 +40,12 @@ crawl() {
|
||||||
}
|
}
|
||||||
|
|
||||||
_crawl() {
|
_crawl() {
|
||||||
log crawling $*
|
log "crawling $*"
|
||||||
local id="$1"
|
local id="$1"
|
||||||
local json="$(notea get "$id")"
|
local json="$(notea get "$id")"
|
||||||
local content="$(echo "$json" | jq -r .content)"
|
local content="$(echo "$json" | jq -r .content)"
|
||||||
if ! is_crawlable "$content"; then
|
if ! is_crawlable "$content"; then
|
||||||
log $content is not crawlable
|
log "$content is not crawlable"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
local crawlable_source="$(extract_crawlable_source "$content")"
|
local crawlable_source="$(extract_crawlable_source "$content")"
|
||||||
|
|
@ -60,7 +60,7 @@ _crawl() {
|
||||||
}
|
}
|
||||||
|
|
||||||
extract_crawlable_source() {
|
extract_crawlable_source() {
|
||||||
echo "$*" | head -n 1 | awk '{print $NF}'
|
echo "$*" | head -n 1 | awk '{print $NF}' | sed 's/^<//' | sed 's/>$//'
|
||||||
}
|
}
|
||||||
|
|
||||||
crawl_with() {
|
crawl_with() {
|
||||||
|
|
@ -73,11 +73,11 @@ crawl_with() {
|
||||||
local expanded=($($backend expand "$crawlable_source"))
|
local expanded=($($backend expand "$crawlable_source"))
|
||||||
log expand $crawlable_source:
|
log expand $crawlable_source:
|
||||||
for i in $(seq 1 $(("${#expanded[@]}"-1))); do
|
for i in $(seq 1 $(("${#expanded[@]}"-1))); do
|
||||||
local title="$(echo "${expanded[i]}" | base64 --decode)"
|
export TITLE="$(echo "${expanded[i]}" | base64 --decode)"
|
||||||
CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")" \
|
export CONTENT="$($backend get "$crawlable_source" "${expanded[i]}")"
|
||||||
ID="$(echo "$crawlable_source/$title" | base64)" \
|
export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')"
|
||||||
PID="$(echo $json | jq -r .id)" \
|
export PID="$(echo $json | jq -r .id)"
|
||||||
TITLE="$title" \
|
log " $PID/$ID ($TITLE): ${#CONTENT}"
|
||||||
notea put
|
notea put
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,20 +41,20 @@ notea() (
|
||||||
local xsrf_token="$(echo "$contains_tokens" | grep -o '"csrfToken":[^,]*' | tr ':' '\n' | jq -r . | tail -n 1)"
|
local xsrf_token="$(echo "$contains_tokens" | grep -o '"csrfToken":[^,]*' | tr ':' '\n' | jq -r . | tail -n 1)"
|
||||||
local xsrf_cookie="$(echo "$contains_tokens" | grep ^set.cookie: | sed 's/^set.cookie: //' | tr ';' '\n' | head -n 1)"
|
local xsrf_cookie="$(echo "$contains_tokens" | grep ^set.cookie: | sed 's/^set.cookie: //' | tr ';' '\n' | head -n 1)"
|
||||||
local request="$(echo '{
|
local request="$(echo '{
|
||||||
"content": '$(echo "$CONTENT" | jq -Rs)',
|
"content": '"$(printf "%s\n" "$CONTENT" | jq -Rs)"',
|
||||||
"deleted": 0,
|
"deleted": 0,
|
||||||
"id": '$(echo "$ID" | jq -R)',
|
"id": '"$(echo "$ID" | jq -R)"',
|
||||||
"pid": '$(echo "$PID" | jq -R)',
|
"pid": '"$(echo "$PID" | jq -R)"',
|
||||||
"pinned": 0,
|
"pinned": 0,
|
||||||
"shared": 0,
|
"shared": 0,
|
||||||
"title": '$(echo "$TITLE" | jq -R)'
|
"title": '"$(echo "$TITLE" | jq -R)"'
|
||||||
}' | jq -c .)"
|
}' | jq -c .)"
|
||||||
ncurl \
|
echo "$request" | ncurl \
|
||||||
-X POST \
|
-X POST \
|
||||||
-H "$xsrf_key: $xsrf_token" \
|
-H "$xsrf_key: $xsrf_token" \
|
||||||
-b "$xsrf_cookie" \
|
-b "$xsrf_cookie" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d "$request" \
|
-d @- \
|
||||||
$NOTEA_ADDR/api/notes \
|
$NOTEA_ADDR/api/notes \
|
||||||
| grep -q "$ID"
|
| grep -q "$ID"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue