split main into many files

master
Bel LaPointe 2022-02-01 08:26:21 -07:00
parent 3d9cfc2f95
commit 3c34670b4f
4 changed files with 163 additions and 156 deletions

28
app/crawler/cache.sh Normal file
View File

@ -0,0 +1,28 @@
#! /bin/bash
cache() (
path() {
echo "$CACHE/$(echo "$*" | base64)"
}
get() {
local path="$(path "$*")"
if ! [ -f "$path" ]; then
return 1
fi
if wc -c "$path" | grep -q '^[ ]*0[ ]*$'; then
return 1
fi
local created="$(date -r "$path" +%s)"
local now="$(date +%s)"
if ((now-created > CACHE_DURATION)); then
return 1
fi
cat "$path"
}
put() {
local path="$(path "$*")"
tee "$path"
}
"$@"
)

105
app/crawler/gitlab.sh Normal file
View File

@ -0,0 +1,105 @@
#! /bin/bash
gitlab() (
is() {
echo "$*" | grep -q gitlab.app && ! echo "$*" | grep -q '/wiki/'
}
_gcurl() {
local cache_key="gitlab _gcurl $*"
if cache get "$cache_key"; then
return 0
fi
__gcurl "$@" | cache put "$cache_key"
}
__gcurl() {
curl -sS -H "Authorization: Bearer $GITLAB_PAT" "$@"
}
expand() {
local cache_key="gitlab expand $*"
if cache get "$cache_key"; then
return 0
fi
_expand "$@" | sort | cache put "$cache_key"
}
_expand() {
local url="$1"
local project="$(_url_to_project_root "$url" | head -n 1)"
local root="$(_url_to_project_root "$url" | tail -n 1)"
__expand "$project" "$root"
}
_url_to_project_root() {
local url="$1"
local url_path="${url#http*://gitlab*.net/}"
local project="${url_path%%/-/*}"
local root="${url_path#*/-/}"
local root="${root#tree/}"
local root="${root#blob/}"
local root="${root#*/}"
echo "$project"
echo "$root"
}
__expand() {
local project="$1"
local root="${2:-"/"}"
local b64_files=()
local b64_trees=("$root")
local i=0
find_each() {
local type="$1"
shift
echo "$*" \
| jq -c .[] \
| grep "\"type\":\"$type\"" \
| jq -r .path \
| while read -r line; do echo "$line" | base64; done \
| grep .
}
while [ "$i" -lt "${#b64_trees[@]}" ]; do
got="$(_list_tree "$project" "${b64_trees[i]}")"
for b64_tree in $(find_each "tree" "$got"); do
if ! echo "${b64_trees[@]}" | grep -q "[ ^]$b64_tree[ $]"; then
b64_trees+=("$b64_tree")
fi
done
for b64_file in $(find_each "blob" "$got"); do
if ! echo "${b64_files[@]}" | grep -q "[ ^]$b64_file[ $]"; then
b64_files+=("$b64_file")
fi
done
i=$((i+1))
done
for b64_file in "${b64_files[@]}"; do
echo "$b64_file" | base64 --decode
done
}
_list_tree() {
local project="$(urlencode "$1")"
local path="/api/v4/projects/$project/repository/tree"
local query="recursive=true&path=$2"
_gcurl "https://gitlab-app.eng.qops.net/$path?$query"
}
"$@"
)
urlencode() (
LC_COLLATE=C
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:$i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
)

View File

@ -18,6 +18,7 @@ config() {
export CRAWL_INTERVAL=$((60*5)) export CRAWL_INTERVAL=$((60*5))
export NOTEA_ADDR="${NOTEA_ADDR:-"http://localhost:3000"}" export NOTEA_ADDR="${NOTEA_ADDR:-"http://localhost:3000"}"
export GITLAB_PAT="$GITLAB_PAT" export GITLAB_PAT="$GITLAB_PAT"
source ./gitlab.sh
} }
log() { log() {
@ -83,162 +84,6 @@ rewrite() {
return 1 return 1
} }
notea() (
ncurl() {
curl -sS "$@"
}
ids() {
ncurl $NOTEA_ADDR/api/tree \
| jq -r '.items \
| to_entries[].value.id' \
| grep -v '^root$'
}
get() {
local cache_key="notea cache $1"
if cache get "$cache_key"; then
return 0
fi
_get "$@" | cache put "$cache_key"
}
_get() {
ncurl $NOTEA_ADDR/api/notes/$1
}
"$@"
)
cache() (
path() {
echo "$CACHE/$(echo "$*" | base64)"
}
get() {
local path="$(path "$*")"
if ! [ -f "$path" ]; then
return 1
fi
if wc -c "$path" | grep -q '^[ ]*0[ ]*$'; then
return 1
fi
local created="$(date -r "$path" +%s)"
local now="$(date +%s)"
if ((now-created > CACHE_DURATION)); then
return 1
fi
cat "$path"
}
put() {
local path="$(path "$*")"
tee "$path"
}
"$@"
)
gitlab() (
is() {
echo "$*" | grep -q gitlab.app && ! echo "$*" | grep -q '/wiki/'
}
_gcurl() {
local cache_key="gitlab _gcurl $*"
if cache get "$cache_key"; then
return 0
fi
__gcurl "$@" | cache put "$cache_key"
}
__gcurl() {
curl -sS -H "Authorization: Bearer $GITLAB_PAT" "$@"
}
expand() {
local cache_key="gitlab expand $*"
if cache get "$cache_key"; then
return 0
fi
_expand "$@" | sort | cache put "$cache_key"
}
_expand() {
local url="$1"
local project="$(_url_to_project_root "$url" | head -n 1)"
local root="$(_url_to_project_root "$url" | tail -n 1)"
__expand "$project" "$root"
}
_url_to_project_root() {
local url="$1"
local url_path="${url#http*://gitlab*.net/}"
local project="${url_path%%/-/*}"
local root="${url_path#*/-/}"
local root="${root#tree/}"
local root="${root#blob/}"
local root="${root#*/}"
echo "$project"
echo "$root"
}
__expand() {
local project="$1"
local root="${2:-"/"}"
local b64_files=()
local b64_trees=("$root")
local i=0
find_each() {
local type="$1"
shift
echo "$*" \
| jq -c .[] \
| grep "\"type\":\"$type\"" \
| jq -r .path \
| while read -r line; do echo "$line" | base64; done \
| grep .
}
while [ "$i" -lt "${#b64_trees[@]}" ]; do
got="$(_list_tree "$project" "${b64_trees[i]}")"
for b64_tree in $(find_each "tree" "$got"); do
if ! echo "${b64_trees[@]}" | grep -q "[ ^]$b64_tree[ $]"; then
b64_trees+=("$b64_tree")
fi
done
for b64_file in $(find_each "blob" "$got"); do
if ! echo "${b64_files[@]}" | grep -q "[ ^]$b64_file[ $]"; then
b64_files+=("$b64_file")
fi
done
i=$((i+1))
done
for b64_file in "${b64_files[@]}"; do
echo "$b64_file" | base64 --decode
done
}
_list_tree() {
local project="$(urlencode "$1")"
local path="/api/v4/projects/$project/repository/tree"
local query="recursive=true&path=$2"
_gcurl "https://gitlab-app.eng.qops.net/$path?$query"
}
"$@"
)
urlencode() (
LC_COLLATE=C
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:$i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
)
if [ "$0" == "$BASH_SOURCE" ]; then if [ "$0" == "$BASH_SOURCE" ]; then
main "$@" main "$@"
fi fi

29
app/crawler/notea.sh Normal file
View File

@ -0,0 +1,29 @@
#! /bin/bash
notea() (
ncurl() {
curl -sS "$@"
}
ids() {
ncurl $NOTEA_ADDR/api/tree \
| jq -r '.items \
| to_entries[].value.id' \
| grep -v '^root$'
}
get() {
local cache_key="notea cache $1"
if cache get "$cache_key"; then
return 0
fi
_get "$@" | cache put "$cache_key"
}
_get() {
ncurl $NOTEA_ADDR/api/notes/$1
}
"$@"
)