diff --git a/app/crawler/gitlab.sh b/app/crawler/gitlab.sh index d125ec3..b193114 100644 --- a/app/crawler/gitlab.sh +++ b/app/crawler/gitlab.sh @@ -74,7 +74,6 @@ gitlab() ( done i=$((i+1)) done - echo "$project" | base64 for b64_file in "${b64_files[@]}"; do echo "$b64_file" done diff --git a/app/crawler/gitlab_test.sh b/app/crawler/gitlab_test.sh index 5d10c9c..2c9eeb9 100644 --- a/app/crawler/gitlab_test.sh +++ b/app/crawler/gitlab_test.sh @@ -20,9 +20,9 @@ test___expand() { ;; esac } - __expand project dir | grep -q ^$(echo project | base64)$ - __expand project dir | grep -q ^$(echo dir/blob | base64)$ - __expand project dir | grep -q ^$(echo dir/dir2/blob2 | base64)$ + ! __expand project dir | grep -q ^$(echo project | base64)$ || return 1 + __expand project dir | grep -q ^$(echo dir/blob | base64)$ || return 2 + __expand project dir | grep -q ^$(echo dir/dir2/blob2 | base64)$ || return 3 EOF )" } diff --git a/app/crawler/main.sh b/app/crawler/main.sh index 725527a..912ad92 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -4,6 +4,8 @@ main() { config for id in $(ids); do crawl "$id" + done + for id in $(ids); do rewrite "$id" done } @@ -12,6 +14,7 @@ config() { set -o pipefail set -e export CACHE="${CACHE:-"$(mktemp -d)"}" + mkdir -p "$CACHE" export CACHE_DURATION=$((60*5)) export NOTEA_ADDR="${NOTEA_ADDR:-"http://localhost:3000"}" export GITLAB_PAT="$GITLAB_PAT" @@ -37,10 +40,12 @@ crawl() { } _crawl() { + log crawling $* local id="$1" local json="$(notea get "$id")" local content="$(echo "$json" | jq -r .content)" if ! is_crawlable "$content"; then + log $content is not crawlable return 0 fi local crawlable_source="$(extract_crawlable_source "$content")" @@ -65,10 +70,10 @@ crawl_with() { local content="$(echo "$json" | jq -r .content)" local crawlable_source="$(extract_crawlable_source "$content")" - local expanded=("$($backend expand "$crawlable_source")") + local expanded=($($backend expand "$crawlable_source")) local context="$expanded" for i in $(seq 1 "${#expanded[@]}"); do - log expand $context, ${expanded[i]} + log expand $(echo $context | base64 --decode), $(echo ${expanded[i]} | base64 --decode) done log not impl crawl with diff --git a/app/crawler/notea.sh b/app/crawler/notea.sh index 33c77e6..e034524 100644 --- a/app/crawler/notea.sh +++ b/app/crawler/notea.sh @@ -7,7 +7,9 @@ notea() ( ids() { ncurl $NOTEA_ADDR/api/tree \ - | jq -r '.items | to_entries[].value.id' \ + | jq '.items | to_entries[].value.id' \ + | grep -v '^null$' \ + | jq -r . \ | grep -v '^root$' } diff --git a/app/crawler/notea_test.sh b/app/crawler/notea_test.sh index 1a3148e..f119886 100644 --- a/app/crawler/notea_test.sh +++ b/app/crawler/notea_test.sh @@ -18,8 +18,8 @@ test_ids() { } }' } - ids | wc -l | grep -q 1 - ids | grep -q def + ids | wc -l | grep -q 1 || return 101 + ids | grep -q def || return 102 EOF )" } @@ -30,10 +30,10 @@ test_get() { echo "$*" | grep -q \/api\/notes\/abc echo 'asdf' } - ! cache get "notea cache abc" | grep -q asdf - get abc | wc -l | grep -q 1 - get abc | grep -q asdf - cache get "notea cache abc" | grep -q asdf + ! cache get "notea cache abc" | grep -q asdf || return 101 + get abc | wc -l | grep -q 1 || return 102 + get abc | grep -q asdf || return 103 + cache get "notea cache abc" | grep -q asdf || return 104 EOF )" } diff --git a/app/crawler/test.sh b/app/crawler/test.sh index a02a92c..62e44dd 100644 --- a/app/crawler/test.sh +++ b/app/crawler/test.sh @@ -18,8 +18,10 @@ one_main() ( local f="$1" local ret=0 for t in $(grep ^test_ "$f" | sed 's/(.*//'); do - if ! one_test "$f" "$t"; then - echo failed $f:$t >&2 + one_test "$f" "$t" + local test_ret=$? + if [ $test_ret != 0 ]; then + echo failed $f:$t: $test_ret >&2 ret=$((ret+1)) fi done @@ -37,6 +39,8 @@ one_test() ( each() { export CACHE=$(mktemp -d) + export GITLAB_PAT=gibberish + export NOTEA_ADDR=http://127.0.0.1:61111 source ./cache.sh set -e set -o pipefail