master
Bel LaPointe 2022-02-10 08:35:42 -07:00
parent f190bdecca
commit 92226f9aea
1 changed files with 27 additions and 6 deletions

View File

@ -41,7 +41,7 @@ crawl() {
} }
_crawl() { _crawl() {
log "crawling $*" log "crawling? $*"
local id="$1" local id="$1"
local content="$(notes get "$id")" local content="$(notes get "$id")"
local json="$( local json="$(
@ -77,21 +77,42 @@ crawl_with() {
local expanded=($($backend expand "$crawlable_source")) local expanded=($($backend expand "$crawlable_source"))
log expand $crawlable_source: log expand $crawlable_source:
notes_mkdir_p() {
local id="$1"
local subtitle="$2"
if ! notes get "$id" | grep -q '^404 page not found$'; then
return
fi
notes put "$id" "$subtitle" "autogenerated content"
}
one() { one() {
encode() {
base64 | md5sum | cut -c 1-10 | awk '{print $1}' | tr -d '\n'
}
local i="$1" local i="$1"
export TITLE="$( export TITLE="$(
echo "$i" | base64 --decode echo "$i" | base64 --decode
)" )"
TITLE="${TITLE##*/}"
export CONTENT="$( export CONTENT="$(
$backend get "$crawlable_source" "$i" $backend get "$crawlable_source" "$i"
)" )"
local pid="$(echo $json | jq -r .id)"
export ID="$( export ID="$(
echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}' local sum="$pid/"
local title_so_far=""
for subtitle in $(echo "$i" | base64 --decode | tr '/' '\n' | while read -r subtitle; do echo "$subtitle" | base64; done); do
local subtitle="$(echo "$subtitle" | base64 --decode)"
if [ -n "$title_so_far" ]; then
notes_mkdir_p "${sum%/}" "${title_so_far%/}" >&2
fi
sum+="$(echo "$subtitle" | encode)/"
title_so_far+="$subtitle/"
done
echo "$sum"
)" )"
export PID="$( ID="${ID%/}"
echo $json | jq -r .id log " $ID ($TITLE): ${#CONTENT}"
)"
log " $PID/$ID ($TITLE): ${#CONTENT}"
push_crawled "$PID/$ID" "$TITLE" "$CONTENT" push_crawled "$PID/$ID" "$TITLE" "$CONTENT"
} }
for i in $(seq 1 $(("${#expanded[@]}"-1))); do for i in $(seq 1 $(("${#expanded[@]}"-1))); do