dont crawl crawled subfiles

master
Bel LaPointe 2022-02-10 11:13:04 -07:00
parent 6935303e6a
commit 9acaa4a356
2 changed files with 24 additions and 8 deletions

View File

@ -3,7 +3,7 @@
main() {
config
log crawling ids...
for id in $(ids); do
for id in $(crawlable_ids); do
crawl "$id"
done
log rewriting ids...
@ -30,7 +30,28 @@ log() {
}
ids() {
notes ids
notes ids | sort
}
crawlable_ids() {
local all_ids=($(ids))
local crawlable_ids=()
for id in "${all_ids[@]}"; do
if for crawlable_id in "${crawlable_ids[@]}"; do
if [ "$id" != "${id#$crawlable_id/}" ]; then
echo true
fi
done | grep -q true; then
continue
fi
local content="$(notes get "$id")"
if is_crawlable "$content"; then
crawlable_ids+=("$id")
fi
done
for crawlable_id in "${crawlable_ids[@]}"; do
echo "$crawlable_id"
done
}
crawl() {
@ -43,7 +64,6 @@ crawl() {
}
_crawl() {
log "crawling? $*"
local id="$1"
local content="$(notes get "$id")"
local json="$(
@ -51,10 +71,6 @@ _crawl() {
"$(echo "$content" | jq -Rs)" \
"$id"
)"
if ! is_crawlable "$content"; then
log "not crawlable: '${content:0:20}'..."
return 0
fi
local crawlable_source="$(extract_crawlable_source "$content")"
for backend in gitlab; do
if $backend is "$crawlable_source"; then

View File

@ -1,5 +1,5 @@
todo:
- preview default via q param
- css
done:
- https://developer.mozilla.org/en-US/docs/Web/API/History/pushState#change_a_query_parameter
- preview default via q param