From 2c8abf7158f2751b8c9a1561b7fb5bd2ff5db14b Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Tue, 1 Feb 2022 11:30:15 -0700 Subject: [PATCH] gr --- app/crawler/gitlab.sh | 13 ++++++++++--- app/crawler/gitlab_test.sh | 24 +++++++++++++++++++++--- app/crawler/main.sh | 6 +++--- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/app/crawler/gitlab.sh b/app/crawler/gitlab.sh index b193114..e69916a 100644 --- a/app/crawler/gitlab.sh +++ b/app/crawler/gitlab.sh @@ -6,10 +6,14 @@ gitlab() ( } get() { - local project="$1" + local url="$1" local blob="$2" + + local project="$(_url_to_project_root "$url" | head -n 1)" project="$(urlencode "$project")" - blob="$(urlencode "$blob")" + local root="$(_url_to_project_root "$url" | tail -n 1)" + blob="$(urlencode "$root/$blob")" + local path="api/v4/projects/$project/repository/files/$blob/raw" _gcurl "https://gitlab-app.eng.qops.net/$path" } @@ -75,7 +79,10 @@ gitlab() ( i=$((i+1)) done for b64_file in "${b64_files[@]}"; do - echo "$b64_file" + local file="$(echo "$b64_file" | base64 --decode)" + file="${file#$root}" + file="${file#/}" + echo "$file" | base64 done } diff --git a/app/crawler/gitlab_test.sh b/app/crawler/gitlab_test.sh index 2c9eeb9..4d89dbf 100644 --- a/app/crawler/gitlab_test.sh +++ b/app/crawler/gitlab_test.sh @@ -20,9 +20,27 @@ test___expand() { ;; esac } - ! __expand project dir | grep -q ^$(echo project | base64)$ || return 1 - __expand project dir | grep -q ^$(echo dir/blob | base64)$ || return 2 - __expand project dir | grep -q ^$(echo dir/dir2/blob2 | base64)$ || return 3 + local blob=false + local dir2blob2=false + local others=0 + for result in \$(__expand project dir); do + if echo \$result | base64 --decode | grep -q ^blob$; then + blob=true + elif echo \$result | base64 --decode | grep -q ^dir2.blob2$; then + dir2blob2=true + else + others=\$((others+1)) + fi + done + if [ \$others != 0 ]; then + return 101 + fi + if ! \$blob; then + return 102 + fi + if ! \$dir2blob2; then + return 103 + fi EOF )" } diff --git a/app/crawler/main.sh b/app/crawler/main.sh index 912ad92..d1a1d8c 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -24,7 +24,7 @@ config() { } log() { - echo "$(date)> $*" >&2 + echo "$(date +%H:%M:%S)> $*" >&2 } ids() { @@ -71,9 +71,9 @@ crawl_with() { local crawlable_source="$(extract_crawlable_source "$content")" local expanded=($($backend expand "$crawlable_source")) - local context="$expanded" + log expand $crawlable_source: for i in $(seq 1 "${#expanded[@]}"); do - log expand $(echo $context | base64 --decode), $(echo ${expanded[i]} | base64 --decode) + log " $(echo ${expanded[i]} | base64 --decode)" done log not impl crawl with