sparkles
parent
f190bdecca
commit
92226f9aea
|
|
@ -41,7 +41,7 @@ crawl() {
|
||||||
}
|
}
|
||||||
|
|
||||||
_crawl() {
|
_crawl() {
|
||||||
log "crawling $*"
|
log "crawling? $*"
|
||||||
local id="$1"
|
local id="$1"
|
||||||
local content="$(notes get "$id")"
|
local content="$(notes get "$id")"
|
||||||
local json="$(
|
local json="$(
|
||||||
|
|
@ -77,21 +77,42 @@ crawl_with() {
|
||||||
|
|
||||||
local expanded=($($backend expand "$crawlable_source"))
|
local expanded=($($backend expand "$crawlable_source"))
|
||||||
log expand $crawlable_source:
|
log expand $crawlable_source:
|
||||||
|
notes_mkdir_p() {
|
||||||
|
local id="$1"
|
||||||
|
local subtitle="$2"
|
||||||
|
if ! notes get "$id" | grep -q '^404 page not found$'; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
notes put "$id" "$subtitle" "autogenerated content"
|
||||||
|
}
|
||||||
one() {
|
one() {
|
||||||
|
encode() {
|
||||||
|
base64 | md5sum | cut -c 1-10 | awk '{print $1}' | tr -d '\n'
|
||||||
|
}
|
||||||
local i="$1"
|
local i="$1"
|
||||||
export TITLE="$(
|
export TITLE="$(
|
||||||
echo "$i" | base64 --decode
|
echo "$i" | base64 --decode
|
||||||
)"
|
)"
|
||||||
|
TITLE="${TITLE##*/}"
|
||||||
export CONTENT="$(
|
export CONTENT="$(
|
||||||
$backend get "$crawlable_source" "$i"
|
$backend get "$crawlable_source" "$i"
|
||||||
)"
|
)"
|
||||||
|
local pid="$(echo $json | jq -r .id)"
|
||||||
export ID="$(
|
export ID="$(
|
||||||
echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}'
|
local sum="$pid/"
|
||||||
|
local title_so_far=""
|
||||||
|
for subtitle in $(echo "$i" | base64 --decode | tr '/' '\n' | while read -r subtitle; do echo "$subtitle" | base64; done); do
|
||||||
|
local subtitle="$(echo "$subtitle" | base64 --decode)"
|
||||||
|
if [ -n "$title_so_far" ]; then
|
||||||
|
notes_mkdir_p "${sum%/}" "${title_so_far%/}" >&2
|
||||||
|
fi
|
||||||
|
sum+="$(echo "$subtitle" | encode)/"
|
||||||
|
title_so_far+="$subtitle/"
|
||||||
|
done
|
||||||
|
echo "$sum"
|
||||||
)"
|
)"
|
||||||
export PID="$(
|
ID="${ID%/}"
|
||||||
echo $json | jq -r .id
|
log " $ID ($TITLE): ${#CONTENT}"
|
||||||
)"
|
|
||||||
log " $PID/$ID ($TITLE): ${#CONTENT}"
|
|
||||||
push_crawled "$PID/$ID" "$TITLE" "$CONTENT"
|
push_crawled "$PID/$ID" "$TITLE" "$CONTENT"
|
||||||
}
|
}
|
||||||
for i in $(seq 1 $(("${#expanded[@]}"-1))); do
|
for i in $(seq 1 $(("${#expanded[@]}"-1))); do
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue