sparkles
parent
f190bdecca
commit
92226f9aea
|
|
@ -41,7 +41,7 @@ crawl() {
|
|||
}
|
||||
|
||||
_crawl() {
|
||||
log "crawling $*"
|
||||
log "crawling? $*"
|
||||
local id="$1"
|
||||
local content="$(notes get "$id")"
|
||||
local json="$(
|
||||
|
|
@ -77,21 +77,42 @@ crawl_with() {
|
|||
|
||||
local expanded=($($backend expand "$crawlable_source"))
|
||||
log expand $crawlable_source:
|
||||
notes_mkdir_p() {
|
||||
local id="$1"
|
||||
local subtitle="$2"
|
||||
if ! notes get "$id" | grep -q '^404 page not found$'; then
|
||||
return
|
||||
fi
|
||||
notes put "$id" "$subtitle" "autogenerated content"
|
||||
}
|
||||
one() {
|
||||
encode() {
|
||||
base64 | md5sum | cut -c 1-10 | awk '{print $1}' | tr -d '\n'
|
||||
}
|
||||
local i="$1"
|
||||
export TITLE="$(
|
||||
echo "$i" | base64 --decode
|
||||
)"
|
||||
TITLE="${TITLE##*/}"
|
||||
export CONTENT="$(
|
||||
$backend get "$crawlable_source" "$i"
|
||||
)"
|
||||
local pid="$(echo $json | jq -r .id)"
|
||||
export ID="$(
|
||||
echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}'
|
||||
local sum="$pid/"
|
||||
local title_so_far=""
|
||||
for subtitle in $(echo "$i" | base64 --decode | tr '/' '\n' | while read -r subtitle; do echo "$subtitle" | base64; done); do
|
||||
local subtitle="$(echo "$subtitle" | base64 --decode)"
|
||||
if [ -n "$title_so_far" ]; then
|
||||
notes_mkdir_p "${sum%/}" "${title_so_far%/}" >&2
|
||||
fi
|
||||
sum+="$(echo "$subtitle" | encode)/"
|
||||
title_so_far+="$subtitle/"
|
||||
done
|
||||
echo "$sum"
|
||||
)"
|
||||
export PID="$(
|
||||
echo $json | jq -r .id
|
||||
)"
|
||||
log " $PID/$ID ($TITLE): ${#CONTENT}"
|
||||
ID="${ID%/}"
|
||||
log " $ID ($TITLE): ${#CONTENT}"
|
||||
push_crawled "$PID/$ID" "$TITLE" "$CONTENT"
|
||||
}
|
||||
for i in $(seq 1 $(("${#expanded[@]}"-1))); do
|
||||
|
|
|
|||
Loading…
Reference in New Issue