get gitlab scrape scripts from hackathon

master
Bel LaPointe 2022-01-31 15:27:18 -07:00
parent e4f9ecde4d
commit 42605c706b
3 changed files with 131 additions and 0 deletions

3
spike/crawl/env.env Normal file
View File

@ -0,0 +1,3 @@
GITLAB_PAT=LkaMCFFfqdHd-r4Cubnw
ODO_TOKEN=ac9a9e4d-9c6b-4049-9e8d-c8b97fe053aa
GDOC_TOKEN=

View File

@ -0,0 +1,64 @@
#! /bin/bash
urlencode() {
# urlencode <string>
old_lc_collate=$LC_COLLATE
LC_COLLATE=C
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:$i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
LC_COLLATE=$old_lc_collate
}
pat="${GITLAB_PAT:-"$(security find-generic-password -a "${USER}" -s GITLAB_PAT -w 2> /dev/null )"}"
project="$(urlencode ${PROJECT:-"data-store/orchestration/runbooks"})"
path="/api/v4/projects/$project/repository/tree"
list() {
local query="recursive=true&path=$(test -n "$GITLAB_PATH" && echo "$GITLAB_PATH")"
curl -sS \
-H "Authorization: Bearer $pat" \
"https://gitlab-app.eng.qops.net/$path?$query"
}
list_wiki() {
curl -sS \
-H "Authorization: Bearer $pat" \
"https://gitlab-app.eng.qops.net/api/v4/projects/$project/wikis?with_content=0"
}
files=()
trees=("${GITLAB_PATH:-"/"}")
i=0
while [ "$i" -lt "${#trees[@]}" ]; do
gitlab_path="${trees[i]}"
echo gitlab_path=$gitlab_path, i=$i, trees=${#trees[@]}, files=${#files[@]}... >&2
got="$(GITLAB_PATH=$gitlab_path list)"
for nested_path in $(echo "$got" | jq -c .[] | grep '"type":"tree"' | jq -r .path | while read -r line; do echo "$line" | base64; done | grep .); do
nested_path="$(echo "$nested_path" | base64 --decode)"
if echo "${trees[@]}" | grep -q "[ ^]$nested_path[ $]"; then
continue
fi
trees+=("$nested_path")
done
for nested_path in $(echo "$got" | jq -c .[] | grep '"type":"blob"' | jq -r .path | while read -r line; do echo "$line" | base64; done | grep .); do
nested_path="$(echo "$nested_path" | base64 --decode)"
if echo "${files[@]}" | grep -q "[ ^]$nested_path[ $]"; then
continue
fi
files+=("$nested_path")
done
i=$((i+1))
if ((i>5)); then
break
fi
done
for file in "${files[@]}"; do
echo "gitlab://$file"
done | sort
list_wiki | jq -c .[] | jq -r .title | sed 's/^/wiki:\/\//'

View File

@ -0,0 +1,64 @@
#! /bin/bash
urlencode() {
# urlencode <string>
old_lc_collate=$LC_COLLATE
LC_COLLATE=C
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:$i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
LC_COLLATE=$old_lc_collate
}
pat="${GITLAB_PAT:-"$(security find-generic-password -a "${USER}" -s GITLAB_PAT -w 2> /dev/null )"}"
project="$(urlencode ${PROJECT:-"data-store/orchestration/runbooks"})"
path="/api/v4/projects/$project/repository/tree"
list() {
local query="recursive=true&path=$(test -n "$GITLAB_PATH" && echo "$GITLAB_PATH")"
curl -sS \
-H "Authorization: Bearer $pat" \
"https://gitlab-app.eng.qops.net/$path?$query"
}
list_wiki() {
curl -sS \
-H "Authorization: Bearer $pat" \
"https://gitlab-app.eng.qops.net/api/v4/projects/$project/wikis?with_content=0"
}
files=()
trees=("${GITLAB_PATH:-"/"}")
i=0
while [ "$i" -lt "${#trees[@]}" ]; do
gitlab_path="${trees[i]}"
echo gitlab_path=$gitlab_path, i=$i, trees=${#trees[@]}, files=${#files[@]}... >&2
got="$(GITLAB_PATH=$gitlab_path list)"
for nested_path in $(echo "$got" | jq -c .[] | grep '"type":"tree"' | jq -r .path | while read -r line; do echo "$line" | base64; done | grep .); do
nested_path="$(echo "$nested_path" | base64 --decode)"
if echo "${trees[@]}" | grep -q "[ ^]$nested_path[ $]"; then
continue
fi
trees+=("$nested_path")
done
for nested_path in $(echo "$got" | jq -c .[] | grep '"type":"blob"' | jq -r .path | while read -r line; do echo "$line" | base64; done | grep .); do
nested_path="$(echo "$nested_path" | base64 --decode)"
if echo "${files[@]}" | grep -q "[ ^]$nested_path[ $]"; then
continue
fi
files+=("$nested_path")
done
i=$((i+1))
if ((i>5)); then
break
fi
done
for file in "${files[@]}"; do
echo "gitlab://$file"
done | sort
list_wiki | jq -c .[] | jq -r .title | sed 's/^/wiki:\/\//'