From fa9aafcd2893089f61e0b61c1882fb4d6e9701c6 Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Thu, 10 Feb 2022 08:52:40 -0700 Subject: [PATCH] simplify mkdir all notes --- app/crawler/main.sh | 13 ++++++++----- app/crawler/notes.sh | 7 +++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/app/crawler/main.sh b/app/crawler/main.sh index a780b45..ad6991b 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -71,18 +71,22 @@ extract_crawlable_source() { crawl_with() { local backend="$1" local json="$2" + local pid="$(echo "$json" | jq -r .id)" local content="$(echo "$json" | jq -r .content)" local crawlable_source="$(extract_crawlable_source "$content")" local expanded=($($backend expand "$crawlable_source")) + + log purge $crawlable_source: + for subid in $(notes ids | grep "^$pid/"); do + notes del "$subid" + done + log expand $crawlable_source: notes_mkdir_p() { local id="$1" local subtitle="$2" - if ! notes get "$id" | grep -q '^404 page not found$'; then - return - fi notes put "$id" "$subtitle" "autogenerated content" } one() { @@ -97,7 +101,6 @@ crawl_with() { export CONTENT="$( $backend get "$crawlable_source" "$i" )" - local pid="$(echo $json | jq -r .id)" export ID="$( local sum="$pid/" local title_so_far="" @@ -128,7 +131,7 @@ is_crawlable() { local crawlable_source="$(extract_crawlable_source "$*")" # https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file local url_pattern="(http|https)://[a-zA-Z0-9./?=_%:-]*" - echo "$crawlable_source" | grep -q -E "^[ ]*$url_pattern[ ]*$" + echo "$crawlable_source" | cut -c 1-300 | grep -q -E "^[ ]*$url_pattern[ ]*$" } rewrite() { diff --git a/app/crawler/notes.sh b/app/crawler/notes.sh index 0972ba8..cb39a49 100644 --- a/app/crawler/notes.sh +++ b/app/crawler/notes.sh @@ -69,6 +69,13 @@ notes() ( _nncurl $NOTES_ADDR/api/v0/files/$1 } + del() { + local id="$1" + _nncurl \ + -X DELETE \ + $NOTES_ADDR/api/v0/files/$id + } + put() { set -u local ret=0