From 6c3229eeb310eff5d229c0bd7ddb83c0bfdf7852 Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Tue, 1 Feb 2022 08:33:55 -0700 Subject: [PATCH] ok i need tests now --- app/crawler/gitlab.sh | 32 +++++++++++++++++++++----------- app/crawler/main.sh | 16 +++++++++++++++- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/app/crawler/gitlab.sh b/app/crawler/gitlab.sh index d36815a..854da59 100644 --- a/app/crawler/gitlab.sh +++ b/app/crawler/gitlab.sh @@ -5,16 +5,13 @@ gitlab() ( echo "$*" | grep -q gitlab.app && ! echo "$*" | grep -q '/wiki/' } - _gcurl() { - local cache_key="gitlab _gcurl $*" - if cache get "$cache_key"; then - return 0 - fi - __gcurl "$@" | cache put "$cache_key" - } - - __gcurl() { - curl -sS -H "Authorization: Bearer $GITLAB_PAT" "$@" + get() { + local project="$1" + local blob="$2" + project="$(urlencode "$project")" + blob="$(urlencode "$blob")" + local path="api/v4/projects/$project/repository/files/$blob/raw" + _gcurl "https://gitlab-app.eng.qops.net/$path" } expand() { @@ -76,8 +73,9 @@ gitlab() ( done i=$((i+1)) done + echo "$project" | base64 for b64_file in "${b64_files[@]}"; do - echo "$b64_file" | base64 --decode + echo "$b64_file" done } @@ -88,6 +86,18 @@ gitlab() ( _gcurl "https://gitlab-app.eng.qops.net/$path?$query" } + _gcurl() { + local cache_key="gitlab _gcurl $*" + if cache get "$cache_key"; then + return 0 + fi + __gcurl "$@" | cache put "$cache_key" + } + + __gcurl() { + curl -sS -H "Authorization: Bearer $GITLAB_PAT" "$@" + } + "$@" ) diff --git a/app/crawler/main.sh b/app/crawler/main.sh index 78546bf..ea232df 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -49,7 +49,7 @@ crawl() { if ! is_crawlable "$content"; then return 0 fi - local crawlable_source="$(echo "$content" | head -n 1 | awk '{print $NF}')" + local crawlable_source="$(extract_crawlable_source "$content")" for backend in gitlab; do if $backend is "$crawlable_source"; then crawl_with $backend "$json" @@ -60,9 +60,23 @@ crawl() { return 1 } +extract_crawlable_source() { + echo "$*" | head -n 1 | awk '{print $NF}' +} + crawl_with() { local backend="$1" local json="$2" + + local content="$(echo "$json" | jq -r .content)" + local crawlable_source="$(extract_crawlable_source "$content")" + + local expanded=("$($backend expand "$crawlable_source")") + local context="$expanded" + for i in $(seq 1 "${#expanded[@]}"); do + log expand $context, ${expanded[i]} + done + log not impl crawl with return 1 }