master
Bel LaPointe 2022-02-10 08:35:42 -07:00
parent f190bdecca
commit 92226f9aea
1 changed files with 27 additions and 6 deletions

View File

@ -41,7 +41,7 @@ crawl() {
}
_crawl() {
log "crawling $*"
log "crawling? $*"
local id="$1"
local content="$(notes get "$id")"
local json="$(
@ -77,21 +77,42 @@ crawl_with() {
local expanded=($($backend expand "$crawlable_source"))
log expand $crawlable_source:
notes_mkdir_p() {
local id="$1"
local subtitle="$2"
if ! notes get "$id" | grep -q '^404 page not found$'; then
return
fi
notes put "$id" "$subtitle" "autogenerated content"
}
one() {
encode() {
base64 | md5sum | cut -c 1-10 | awk '{print $1}' | tr -d '\n'
}
local i="$1"
export TITLE="$(
echo "$i" | base64 --decode
)"
TITLE="${TITLE##*/}"
export CONTENT="$(
$backend get "$crawlable_source" "$i"
)"
local pid="$(echo $json | jq -r .id)"
export ID="$(
echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}'
local sum="$pid/"
local title_so_far=""
for subtitle in $(echo "$i" | base64 --decode | tr '/' '\n' | while read -r subtitle; do echo "$subtitle" | base64; done); do
local subtitle="$(echo "$subtitle" | base64 --decode)"
if [ -n "$title_so_far" ]; then
notes_mkdir_p "${sum%/}" "${title_so_far%/}" >&2
fi
sum+="$(echo "$subtitle" | encode)/"
title_so_far+="$subtitle/"
done
echo "$sum"
)"
export PID="$(
echo $json | jq -r .id
)"
log " $PID/$ID ($TITLE): ${#CONTENT}"
ID="${ID%/}"
log " $ID ($TITLE): ${#CONTENT}"
push_crawled "$PID/$ID" "$TITLE" "$CONTENT"
}
for i in $(seq 1 $(("${#expanded[@]}"-1))); do