diff --git a/app/crawler/cache.sh b/app/crawler/cache.sh new file mode 100644 index 0000000..52e6dc9 --- /dev/null +++ b/app/crawler/cache.sh @@ -0,0 +1,28 @@ +#! /bin/bash + +cache() ( + path() { + echo "$CACHE/$(echo "$*" | base64)" + } + get() { + local path="$(path "$*")" + if ! [ -f "$path" ]; then + return 1 + fi + if wc -c "$path" | grep -q '^[ ]*0[ ]*$'; then + return 1 + fi + local created="$(date -r "$path" +%s)" + local now="$(date +%s)" + if ((now-created > CACHE_DURATION)); then + return 1 + fi + cat "$path" + } + put() { + local path="$(path "$*")" + tee "$path" + } + "$@" +) + diff --git a/app/crawler/gitlab.sh b/app/crawler/gitlab.sh new file mode 100644 index 0000000..d36815a --- /dev/null +++ b/app/crawler/gitlab.sh @@ -0,0 +1,105 @@ +#! /bin/bash + +gitlab() ( + is() { + echo "$*" | grep -q gitlab.app && ! echo "$*" | grep -q '/wiki/' + } + + _gcurl() { + local cache_key="gitlab _gcurl $*" + if cache get "$cache_key"; then + return 0 + fi + __gcurl "$@" | cache put "$cache_key" + } + + __gcurl() { + curl -sS -H "Authorization: Bearer $GITLAB_PAT" "$@" + } + + expand() { + local cache_key="gitlab expand $*" + if cache get "$cache_key"; then + return 0 + fi + _expand "$@" | sort | cache put "$cache_key" + } + + _expand() { + local url="$1" + local project="$(_url_to_project_root "$url" | head -n 1)" + local root="$(_url_to_project_root "$url" | tail -n 1)" + __expand "$project" "$root" + } + + _url_to_project_root() { + local url="$1" + local url_path="${url#http*://gitlab*.net/}" + local project="${url_path%%/-/*}" + local root="${url_path#*/-/}" + local root="${root#tree/}" + local root="${root#blob/}" + local root="${root#*/}" + echo "$project" + echo "$root" + } + + __expand() { + local project="$1" + local root="${2:-"/"}" + + local b64_files=() + local b64_trees=("$root") + local i=0 + + find_each() { + local type="$1" + shift + echo "$*" \ + | jq -c .[] \ + | grep "\"type\":\"$type\"" \ + | jq -r .path \ + | while read -r line; do echo "$line" | base64; done \ + | grep . + } + while [ "$i" -lt "${#b64_trees[@]}" ]; do + got="$(_list_tree "$project" "${b64_trees[i]}")" + for b64_tree in $(find_each "tree" "$got"); do + if ! echo "${b64_trees[@]}" | grep -q "[ ^]$b64_tree[ $]"; then + b64_trees+=("$b64_tree") + fi + done + for b64_file in $(find_each "blob" "$got"); do + if ! echo "${b64_files[@]}" | grep -q "[ ^]$b64_file[ $]"; then + b64_files+=("$b64_file") + fi + done + i=$((i+1)) + done + for b64_file in "${b64_files[@]}"; do + echo "$b64_file" | base64 --decode + done + } + + _list_tree() { + local project="$(urlencode "$1")" + local path="/api/v4/projects/$project/repository/tree" + local query="recursive=true&path=$2" + _gcurl "https://gitlab-app.eng.qops.net/$path?$query" + } + + "$@" +) + +urlencode() ( + LC_COLLATE=C + local length="${#1}" + for (( i = 0; i < length; i++ )); do + local c="${1:$i:1}" + case $c in + [a-zA-Z0-9.~_-]) printf '%s' "$c" ;; + *) printf '%%%02X' "'$c" ;; + esac + done +) + diff --git a/app/crawler/main.sh b/app/crawler/main.sh index c3b1415..78546bf 100644 --- a/app/crawler/main.sh +++ b/app/crawler/main.sh @@ -18,6 +18,7 @@ config() { export CRAWL_INTERVAL=$((60*5)) export NOTEA_ADDR="${NOTEA_ADDR:-"http://localhost:3000"}" export GITLAB_PAT="$GITLAB_PAT" + source ./gitlab.sh } log() { @@ -83,162 +84,6 @@ rewrite() { return 1 } -notea() ( - ncurl() { - curl -sS "$@" - } - - ids() { - ncurl $NOTEA_ADDR/api/tree \ - | jq -r '.items \ - | to_entries[].value.id' \ - | grep -v '^root$' - } - - get() { - local cache_key="notea cache $1" - if cache get "$cache_key"; then - return 0 - fi - _get "$@" | cache put "$cache_key" - } - - _get() { - ncurl $NOTEA_ADDR/api/notes/$1 - } - - "$@" -) - -cache() ( - path() { - echo "$CACHE/$(echo "$*" | base64)" - } - get() { - local path="$(path "$*")" - if ! [ -f "$path" ]; then - return 1 - fi - if wc -c "$path" | grep -q '^[ ]*0[ ]*$'; then - return 1 - fi - local created="$(date -r "$path" +%s)" - local now="$(date +%s)" - if ((now-created > CACHE_DURATION)); then - return 1 - fi - cat "$path" - } - put() { - local path="$(path "$*")" - tee "$path" - } - "$@" -) - -gitlab() ( - is() { - echo "$*" | grep -q gitlab.app && ! echo "$*" | grep -q '/wiki/' - } - - _gcurl() { - local cache_key="gitlab _gcurl $*" - if cache get "$cache_key"; then - return 0 - fi - __gcurl "$@" | cache put "$cache_key" - } - - __gcurl() { - curl -sS -H "Authorization: Bearer $GITLAB_PAT" "$@" - } - - expand() { - local cache_key="gitlab expand $*" - if cache get "$cache_key"; then - return 0 - fi - _expand "$@" | sort | cache put "$cache_key" - } - - _expand() { - local url="$1" - local project="$(_url_to_project_root "$url" | head -n 1)" - local root="$(_url_to_project_root "$url" | tail -n 1)" - __expand "$project" "$root" - } - - _url_to_project_root() { - local url="$1" - local url_path="${url#http*://gitlab*.net/}" - local project="${url_path%%/-/*}" - local root="${url_path#*/-/}" - local root="${root#tree/}" - local root="${root#blob/}" - local root="${root#*/}" - echo "$project" - echo "$root" - } - - __expand() { - local project="$1" - local root="${2:-"/"}" - - local b64_files=() - local b64_trees=("$root") - local i=0 - - find_each() { - local type="$1" - shift - echo "$*" \ - | jq -c .[] \ - | grep "\"type\":\"$type\"" \ - | jq -r .path \ - | while read -r line; do echo "$line" | base64; done \ - | grep . - } - while [ "$i" -lt "${#b64_trees[@]}" ]; do - got="$(_list_tree "$project" "${b64_trees[i]}")" - for b64_tree in $(find_each "tree" "$got"); do - if ! echo "${b64_trees[@]}" | grep -q "[ ^]$b64_tree[ $]"; then - b64_trees+=("$b64_tree") - fi - done - for b64_file in $(find_each "blob" "$got"); do - if ! echo "${b64_files[@]}" | grep -q "[ ^]$b64_file[ $]"; then - b64_files+=("$b64_file") - fi - done - i=$((i+1)) - done - for b64_file in "${b64_files[@]}"; do - echo "$b64_file" | base64 --decode - done - } - - _list_tree() { - local project="$(urlencode "$1")" - local path="/api/v4/projects/$project/repository/tree" - local query="recursive=true&path=$2" - _gcurl "https://gitlab-app.eng.qops.net/$path?$query" - } - - "$@" -) - -urlencode() ( - LC_COLLATE=C - local length="${#1}" - for (( i = 0; i < length; i++ )); do - local c="${1:$i:1}" - case $c in - [a-zA-Z0-9.~_-]) printf '%s' "$c" ;; - *) printf '%%%02X' "'$c" ;; - esac - done -) - if [ "$0" == "$BASH_SOURCE" ]; then main "$@" fi diff --git a/app/crawler/notea.sh b/app/crawler/notea.sh new file mode 100644 index 0000000..a91d969 --- /dev/null +++ b/app/crawler/notea.sh @@ -0,0 +1,29 @@ +#! /bin/bash + +notea() ( + ncurl() { + curl -sS "$@" + } + + ids() { + ncurl $NOTEA_ADDR/api/tree \ + | jq -r '.items \ + | to_entries[].value.id' \ + | grep -v '^root$' + } + + get() { + local cache_key="notea cache $1" + if cache get "$cache_key"; then + return 0 + fi + _get "$@" | cache put "$cache_key" + } + + _get() { + ncurl $NOTEA_ADDR/api/notes/$1 + } + + "$@" +) +