reorg repo
This commit is contained in:
28
crawler/cache.sh
Normal file
28
crawler/cache.sh
Normal file
@@ -0,0 +1,28 @@
|
||||
#! /bin/bash
|
||||
|
||||
cache() (
|
||||
path() {
|
||||
echo "$CACHE/$(echo "$*" | base64 | md5sum | awk '{print $1}')"
|
||||
}
|
||||
get() {
|
||||
local path="$(path "$*")"
|
||||
if ! [ -f "$path" ]; then
|
||||
return 1
|
||||
fi
|
||||
if wc -c "$path" | grep -q '^[ ]*0[ ]*$'; then
|
||||
return 1
|
||||
fi
|
||||
local created="$(date -r "$path" +%s)"
|
||||
local now="$(date +%s)"
|
||||
if ((now-created > CACHE_DURATION)); then
|
||||
return 1
|
||||
fi
|
||||
cat "$path"
|
||||
}
|
||||
put() {
|
||||
local path="$(path "$*")"
|
||||
tee "$path"
|
||||
}
|
||||
"$@"
|
||||
)
|
||||
|
||||
12
crawler/cache_test.sh
Normal file
12
crawler/cache_test.sh
Normal file
@@ -0,0 +1,12 @@
|
||||
#! /bin/bash
|
||||
|
||||
test_path() {
|
||||
cache path abc | tr '/' '\n' | tail -n 1 | grep -q .
|
||||
}
|
||||
|
||||
test_get_put_get() {
|
||||
export CACHE_DURATION=10
|
||||
! cache get abc
|
||||
echo hi | cache put abc > /dev/null
|
||||
cache get abc | grep -q .
|
||||
}
|
||||
158
crawler/gitlab.sh
Normal file
158
crawler/gitlab.sh
Normal file
@@ -0,0 +1,158 @@
|
||||
#! /bin/bash
|
||||
|
||||
gitlab() (
|
||||
_is_gitlab() {
|
||||
echo "$*" | grep -q gitlab.app
|
||||
}
|
||||
|
||||
_is_wiki() {
|
||||
echo "$*" | grep -q '/wikis'
|
||||
}
|
||||
|
||||
is() {
|
||||
_is_gitlab "$@" && ! _is_wiki "$@"
|
||||
}
|
||||
|
||||
human_url() {
|
||||
_url "$@" | sed 's/api.v4.projects.//' | sed 's/%2F/\//g' | sed 's/.raw$//' | sed 's/repository\/files/-\/tree\/master/'
|
||||
}
|
||||
|
||||
_url() {
|
||||
local base_url="$1"
|
||||
local blob="$(echo "$2" | base64 --decode)"
|
||||
|
||||
local project="$(_url_to_project_root "$base_url" | head -n 1)"
|
||||
project="$(urlencode "$project")"
|
||||
local root="$(_url_to_project_root "$base_url" | tail -n 1)"
|
||||
if [ -n "$root" ]; then
|
||||
blob="${root%/}/${blob#/}"
|
||||
blob="${blob#/}"
|
||||
blob="${blob%/}"
|
||||
fi
|
||||
blob="$(urlencode "$blob")"
|
||||
|
||||
local path="api/v4/projects/$project/repository/files/$blob/raw"
|
||||
log "url: https://gitlab-app.eng.qops.net/$path (blob=$blob, project=$project)"
|
||||
echo "https://gitlab-app.eng.qops.net/$path"
|
||||
}
|
||||
|
||||
get() {
|
||||
_gcurl "$(_url "$@")"
|
||||
}
|
||||
|
||||
expand() {
|
||||
local cache_key="gitlab expand $*"
|
||||
if cache get "$cache_key"; then
|
||||
return 0
|
||||
fi
|
||||
_expand "$@" | sort | cache put "$cache_key"
|
||||
}
|
||||
|
||||
_expand() {
|
||||
local url="$1"
|
||||
local project="$(_url_to_project_root "$url" | head -n 1)"
|
||||
local root="$(_url_to_project_root "$url" | tail -n 1)"
|
||||
__expand "$project" "$root"
|
||||
}
|
||||
|
||||
_url_to_project_root() {
|
||||
local url="$1"
|
||||
local url_path="${url#http*://gitlab*.net/}"
|
||||
local project=""
|
||||
if [[ "$url_path" == *"/-/"* ]]; then
|
||||
project="${url_path%%/-/*}"
|
||||
elif [[ "$url_path" == *"/tree/"* ]]; then
|
||||
project="${url_path%%/tree/*}"
|
||||
else
|
||||
project="$url_path"
|
||||
fi
|
||||
local root="${url_path#*"$project"}"
|
||||
root="${root#*/-/}"
|
||||
root="${root#/}"
|
||||
root="${root#blob/}"
|
||||
root="${root#tree/}"
|
||||
root="$(echo "$root" | sed 's/^[^\/]*//')"
|
||||
root="${root#/}"
|
||||
log project=$project, root=$root, url=$url
|
||||
echo "$project"
|
||||
echo "$root"
|
||||
}
|
||||
|
||||
__expand() {
|
||||
local project="$1"
|
||||
local root="${2:-"/"}"
|
||||
|
||||
local b64_files=()
|
||||
local b64_trees=("$(echo "$root" | base64)")
|
||||
local i=0
|
||||
|
||||
find_each() {
|
||||
local type="$1"
|
||||
shift
|
||||
echo "$*" \
|
||||
| jq -c .[] \
|
||||
| grep "\"type\":\"$type\"" \
|
||||
| jq -r .path \
|
||||
| while read -r line; do echo "$line" | base64; done \
|
||||
| grep .
|
||||
}
|
||||
while [ "$i" -lt "${#b64_trees[@]}" ]; do
|
||||
got="$(_list_tree "$project" "$(echo "${b64_trees[i]}" | base64 --decode)")"
|
||||
for b64_tree in $(find_each "tree" "$got"); do
|
||||
if ! echo "${b64_trees[@]}" | grep -q "[ ^]$b64_tree[ $]"; then
|
||||
b64_trees+=("$b64_tree")
|
||||
fi
|
||||
done
|
||||
for b64_file in $(find_each "blob" "$got"); do
|
||||
if ! echo "${b64_files[@]}" | grep -q "[ ^]$b64_file[ $]"; then
|
||||
b64_files+=("$b64_file")
|
||||
fi
|
||||
done
|
||||
i=$((i+1))
|
||||
done
|
||||
for b64_file in "${b64_files[@]}"; do
|
||||
local file="$(echo "$b64_file" | base64 --decode)"
|
||||
file="${file#$root}"
|
||||
file="${file#/}"
|
||||
case "${file##*.}" in
|
||||
md|txt )
|
||||
echo "$file" | base64
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
_list_tree() {
|
||||
local project="$(urlencode "$1")"
|
||||
local path="api/v4/projects/$project/repository/tree"
|
||||
local query="recursive=true&path=$2"
|
||||
_gcurl "https://gitlab-app.eng.qops.net/$path?$query"
|
||||
}
|
||||
|
||||
_gcurl() {
|
||||
local cache_key="gitlab _gcurl $*"
|
||||
if cache get "$cache_key"; then
|
||||
return 0
|
||||
fi
|
||||
__gcurl "$@" | cache put "$cache_key"
|
||||
}
|
||||
|
||||
__gcurl() {
|
||||
curl -sS -H "Authorization: Bearer $GITLAB_PAT" "$@"
|
||||
}
|
||||
|
||||
"$@"
|
||||
)
|
||||
|
||||
urlencode() (
|
||||
LC_COLLATE=C
|
||||
local length="${#1}"
|
||||
for (( i = 0; i < length; i++ )); do
|
||||
local c="${1:$i:1}"
|
||||
case $c in
|
||||
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;;
|
||||
*) printf '%%%02X' "'$c" ;;
|
||||
esac
|
||||
done
|
||||
)
|
||||
|
||||
69
crawler/gitlab_test.sh
Normal file
69
crawler/gitlab_test.sh
Normal file
@@ -0,0 +1,69 @@
|
||||
#! /bin/bash
|
||||
|
||||
test___expand() {
|
||||
gitlab eval "$(cat <<EOF
|
||||
_gcurl() {
|
||||
case "\$1" in
|
||||
'https://gitlab-app.eng.qops.net/api/v4/projects/project/repository/tree?recursive=true&path=dir' )
|
||||
echo '[
|
||||
{"id": "a", "name": "dir2", "type": "tree", "path": "dir/dir2", "mode": "040000"},
|
||||
{"id": "b", "name": "blob.md", "type": "blob", "path": "dir/blob.md", "mode": "100644"}
|
||||
]'
|
||||
;;
|
||||
'https://gitlab-app.eng.qops.net/api/v4/projects/project/repository/tree?recursive=true&path=dir/dir2' )
|
||||
echo '[
|
||||
{"id": "c", "name": "blob2.txt", "type": "blob", "path": "dir/dir2/blob2.txt", "mode": "100644"},
|
||||
{"id": "c", "name": "blob3.jpg", "type": "blob", "path": "dir/dir2/blob3.jpg", "mode": "100644"}
|
||||
]'
|
||||
;;
|
||||
* )
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
local blob=false
|
||||
local dir2blob2=false
|
||||
local others=0
|
||||
for result in \$(__expand project dir); do
|
||||
if echo \$result | base64 --decode | grep -q ^blob.md$; then
|
||||
blob=true
|
||||
elif echo \$result | base64 --decode | grep -q ^dir2.blob2.txt$; then
|
||||
dir2blob2=true
|
||||
else
|
||||
others=\$((others+1))
|
||||
fi
|
||||
done
|
||||
if [ \$others != 0 ]; then
|
||||
return 101
|
||||
fi
|
||||
if ! \$blob; then
|
||||
return 102
|
||||
fi
|
||||
if ! \$dir2blob2; then
|
||||
return 103
|
||||
fi
|
||||
EOF
|
||||
)"
|
||||
}
|
||||
|
||||
test_url_to_project_root() {
|
||||
log() { true; };
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/tree/master | grep -q '^data-store/orchestration/runbooks$'
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/tree/master | tail -n 1 | grep ^$
|
||||
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/-/blob/master/Alerts/rems/README.md | grep -q 'data-store/orchestration/runbooks'
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/-/blob/master/Alerts/rems/README.md | grep -q 'Alerts/rems/README.md'
|
||||
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/-/tree/master/Alerts | grep -q 'data-store/orchestration/runbooks'
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/-/tree/master/Alerts | grep -q 'Alerts'
|
||||
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks | grep -q 'data-store/orchestration/runbooks'
|
||||
gitlab _url_to_project_root https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks | grep -q '^$'
|
||||
}
|
||||
|
||||
test_is() {
|
||||
gitlab is https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/-/blob/master/Alerts/rems/README.md
|
||||
gitlab is https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks/-/tree/master/Alerts
|
||||
gitlab is https://gitlab-app.eng.qops.net/data-store/orchestration/runbooks
|
||||
! gitlab is https://gitlab-app.eng.qops.net/surveys/marauders-map/wikis/Customer-impact-of-an-outage
|
||||
}
|
||||
86
crawler/gitlab_wiki.sh
Normal file
86
crawler/gitlab_wiki.sh
Normal file
@@ -0,0 +1,86 @@
|
||||
#! /bin/bash
|
||||
|
||||
gitlab_wiki() (
|
||||
is() {
|
||||
gitlab _is_gitlab "$@" && gitlab _is_wiki "$@"
|
||||
}
|
||||
|
||||
human_url() {
|
||||
log "not impl: human url: $@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
_host() {
|
||||
local id="$1"
|
||||
local host="${id%%.net*}.net"
|
||||
echo "$host"
|
||||
}
|
||||
|
||||
_project() {
|
||||
local id="$1"
|
||||
local host="$(_host "$@")"
|
||||
local path="${id#$host}"
|
||||
local project="${path%%/wikis*}"
|
||||
project="${project%/-}"
|
||||
project="${project%/-/}"
|
||||
project="${project#/}"
|
||||
project="${project%/}"
|
||||
echo "$project"
|
||||
}
|
||||
|
||||
_blob() {
|
||||
local id="$1"
|
||||
local host="$(_host "$@")"
|
||||
local project="$(_project "$@")"
|
||||
local path="${id#$host}"
|
||||
local blob="${path#*/wikis}"
|
||||
blob="${blob#/}"
|
||||
blob="${blob%/}"
|
||||
echo "$blob"
|
||||
}
|
||||
|
||||
get() {
|
||||
local base="$1"
|
||||
local host="$(_host "$base")"
|
||||
local project="$(_project "$base")"
|
||||
local blob="$(_blob "$base")"
|
||||
if [ "$(echo "$2" | base64 --decode)" != "" ]; then
|
||||
blob="$blob/$(echo "$2" | base64 --decode)"
|
||||
fi
|
||||
log project=$project
|
||||
log "$host/api/v4/projects/$(urlencode "$project")/wikis/$(urlencode "$blob")"
|
||||
gitlab \
|
||||
_gcurl \
|
||||
"$host/api/v4/projects/$(urlencode "$project")/wikis/$(urlencode "$blob")" \
|
||||
| jq -r .content
|
||||
}
|
||||
|
||||
expand() {
|
||||
local cache_key="gitlab_wiki expand $*"
|
||||
if cache get "$cache_key"; then
|
||||
return 0
|
||||
fi
|
||||
_expand "$@" | sort | cache put "$cache_key"
|
||||
}
|
||||
|
||||
_expand() {
|
||||
local host="$(_host "$1")"
|
||||
local project="$(_project "$1")"
|
||||
local blob="$(_blob "$1")"
|
||||
if [ -n "$blob" ] && [ "$blob" != "" ]; then
|
||||
echo "" | base64
|
||||
return
|
||||
fi
|
||||
log host=$host, project=$project, blob=$blob
|
||||
gitlab \
|
||||
_gcurl \
|
||||
"$host/api/v4/projects/$(urlencode "$project")/wikis?with_content=0" \
|
||||
| jq -r .[].slug \
|
||||
| while read -r line; do
|
||||
echo "$line" | base64
|
||||
done
|
||||
}
|
||||
|
||||
"$@"
|
||||
)
|
||||
|
||||
182
crawler/main.sh
Normal file
182
crawler/main.sh
Normal file
@@ -0,0 +1,182 @@
|
||||
#! /bin/bash
|
||||
|
||||
main() {
|
||||
config
|
||||
log crawling ids...
|
||||
for id in $(crawlable_ids); do
|
||||
crawl "$id"
|
||||
done
|
||||
log rewriting ids...
|
||||
for id in $(ids); do
|
||||
rewrite "$id"
|
||||
done
|
||||
}
|
||||
|
||||
config() {
|
||||
set -o pipefail
|
||||
set -e
|
||||
export CACHE="${CACHE:-"$(mktemp -d)"}"
|
||||
mkdir -p "$CACHE"
|
||||
export CACHE_DURATION=$((60*50))
|
||||
export NOTES_ADDR="${NOTES_ADDR:-"http://localhost:3004"}"
|
||||
export GITLAB_PAT="$GITLAB_PAT"
|
||||
source ./gitlab.sh
|
||||
source ./gitlab_wiki.sh
|
||||
source ./cache.sh
|
||||
source ./notes.sh
|
||||
}
|
||||
|
||||
log() {
|
||||
echo "$(echo "$(date +%H:%M:%S)> $*" | tr '\n' ' ')" >&2
|
||||
}
|
||||
|
||||
ids() {
|
||||
notes ids | sort
|
||||
}
|
||||
|
||||
crawlable_ids() {
|
||||
local all_ids=($(ids))
|
||||
local crawlable_ids=()
|
||||
for id in "${all_ids[@]}"; do
|
||||
if for crawlable_id in "${crawlable_ids[@]}"; do
|
||||
if [ "$id" != "${id#$crawlable_id/}" ]; then
|
||||
echo true
|
||||
fi
|
||||
done | grep -q true; then
|
||||
continue
|
||||
fi
|
||||
local content="$(notes get "$id")"
|
||||
if is_crawlable "$content"; then
|
||||
crawlable_ids+=("$id")
|
||||
fi
|
||||
done
|
||||
for crawlable_id in "${crawlable_ids[@]}"; do
|
||||
echo "$crawlable_id"
|
||||
done
|
||||
}
|
||||
|
||||
crawl() {
|
||||
local cache_key="crawled $*"
|
||||
# TODO
|
||||
if false && cache get "$cache_key"; then
|
||||
return
|
||||
fi
|
||||
_crawl "$@" | cache put "$cache_key"
|
||||
}
|
||||
|
||||
_crawl() {
|
||||
local id="$1"
|
||||
local content="$(notes get "$id")"
|
||||
local json="$(
|
||||
printf '{"content": %s, "id": "%s"}' \
|
||||
"$(echo "$content" | jq -Rs)" \
|
||||
"$id"
|
||||
)"
|
||||
local crawlable_source="$(extract_crawlable_source "$content")"
|
||||
for backend in gitlab gitlab_wiki; do
|
||||
if $backend is "$crawlable_source"; then
|
||||
crawl_with $backend "$json"
|
||||
return $?
|
||||
fi
|
||||
done
|
||||
log "unknown backend for $crawlable_source"
|
||||
return 1
|
||||
}
|
||||
|
||||
extract_crawlable_source() {
|
||||
echo "$*" | head -n 1 | awk '{print $NF}' | sed 's/^<//' | sed 's/>$//' | sed 's/^\///' | sed 's/\/$//'
|
||||
}
|
||||
|
||||
crawl_with() {
|
||||
local backend="$1"
|
||||
local json="$2"
|
||||
local pid="$(echo "$json" | jq -r .id)"
|
||||
|
||||
local content="$(echo "$json" | jq -r .content)"
|
||||
local crawlable_source="$(extract_crawlable_source "$content")"
|
||||
|
||||
local expanded=($($backend expand "$crawlable_source"))
|
||||
|
||||
log purge $crawlable_source:
|
||||
for subid in $(notes ids | grep "^$pid/"); do
|
||||
notes del "$subid"
|
||||
done
|
||||
|
||||
log expand $crawlable_source:"$expanded"
|
||||
notes_mkdir_p() {
|
||||
local id="$1"
|
||||
local subtitle="${2%/}"
|
||||
notes put "$id" "$subtitle" "autogenerated content"
|
||||
}
|
||||
one() {
|
||||
encode() {
|
||||
base64 | md5sum | cut -c 1-10 | awk '{print $1}' | tr -d '\n'
|
||||
}
|
||||
local i="$1"
|
||||
local full_title="$(
|
||||
echo "$i" | base64 --decode | grep . || echo "${crawlable_source##*/}"
|
||||
)"
|
||||
full_title="${full_title%/}"
|
||||
full_title="${full_title#/}"
|
||||
export TITLE="${full_title##*/}"
|
||||
local human_url="$($backend human_url "$crawlable_source" "$i")"
|
||||
export CONTENT="$(
|
||||
echo "**!! WARNING !! This page is autogenerated and prone to destruction and replacement**"
|
||||
echo "**[See the original]($human_url)**"
|
||||
$backend get "$crawlable_source" "$i" \
|
||||
| sed 's/](\([^#h]\)/]\(%%%\1/g'
|
||||
)"
|
||||
export CONTENT="${CONTENT//"%%%"/"${human_url%/*}/"}"
|
||||
export CONTENT="$(
|
||||
printf "%s\n" "$CONTENT" \
|
||||
| sed 's/!\[\([^]]*\)](\([^)]*\)\/-\/tree\/\([^)]*\))//g'
|
||||
)"
|
||||
export ID="$(
|
||||
local sum="$pid/"
|
||||
local title_so_far=""
|
||||
for subtitle in $(echo $full_title | tr '/' '\n' | while read -r subtitle; do echo "$subtitle" | base64; done); do
|
||||
local subtitle="$(echo "$subtitle" | base64 --decode)"
|
||||
if [ -n "$title_so_far" ]; then
|
||||
local mkdir_p_title="${title_so_far%/}"
|
||||
mkdir_p_title="${mkdir_p_title##*/}"
|
||||
notes_mkdir_p "${sum%/}" "${mkdir_p_title}" >&2
|
||||
fi
|
||||
sum+="$(echo "$subtitle" | encode)/"
|
||||
title_so_far+="$subtitle/"
|
||||
done
|
||||
echo "$sum"
|
||||
)"
|
||||
ID="${ID%/}"
|
||||
log " $ID ($TITLE): ${#CONTENT}"
|
||||
push_crawled "$ID" "$TITLE" "$CONTENT"
|
||||
}
|
||||
if [ "${#expanded[@]}" -gt 0 ]; then
|
||||
for i in $(seq 0 $(("${#expanded[@]}"-1))); do
|
||||
one "${expanded[i]}"
|
||||
done
|
||||
else
|
||||
one ""
|
||||
fi
|
||||
}
|
||||
|
||||
push_crawled() {
|
||||
notes put "$@"
|
||||
}
|
||||
|
||||
is_crawlable() {
|
||||
local crawlable_source="$(extract_crawlable_source "$*")"
|
||||
# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file
|
||||
local url_pattern="(http|https)://[a-zA-Z0-9./?=_%:-]*"
|
||||
echo "$crawlable_source" | cut -c 1-300 | grep -q -E "^[ ]*$url_pattern[ ]*$"
|
||||
}
|
||||
|
||||
rewrite() {
|
||||
log not impl: rewrite "./asdf" to "absolute.com/asdf"
|
||||
log not impl: rewrite "#abc-def?f=abc" to "#h-abc-def?f=abc" or better dont depend on query params so much
|
||||
log not impl rewrite, change images
|
||||
return 1
|
||||
}
|
||||
|
||||
if [ "$0" == "$BASH_SOURCE" ]; then
|
||||
main "$@"
|
||||
fi
|
||||
102
crawler/notes.sh
Normal file
102
crawler/notes.sh
Normal file
@@ -0,0 +1,102 @@
|
||||
#! /bin/bash
|
||||
|
||||
notes() (
|
||||
ids() {
|
||||
_recurse_ids "" "$(_tree)"
|
||||
}
|
||||
|
||||
_tree() {
|
||||
__tree "$@"
|
||||
}
|
||||
|
||||
__tree() {
|
||||
_nncurl $NOTES_ADDR/api/v0/tree
|
||||
}
|
||||
|
||||
_nncurl() {
|
||||
curl -sS "$@"
|
||||
}
|
||||
|
||||
_recurse_ids() {
|
||||
local prefix="$1"
|
||||
local json="$2"
|
||||
if echo "$json" | jq .Branches | grep -q ^null$; then
|
||||
return 0
|
||||
fi
|
||||
local b64lines="$(echo "$json" | jq -r '.Branches | keys[]' | while read -r line; do echo "$line" | base64; done)"
|
||||
if [ -z "$b64lines" ]; then
|
||||
return 0
|
||||
fi
|
||||
for line in $b64lines; do
|
||||
line="$(echo "$line" | base64 --decode)"
|
||||
local subfix="$(printf "%s/%s" "$prefix" "$line")"
|
||||
subfix="${subfix#/}"
|
||||
if ! _is_deleted "$subfix"; then
|
||||
echo "$subfix"
|
||||
fi
|
||||
_recurse_ids "$subfix" "$(echo "$json" | jq -c ".Branches[\"$line\"]")"
|
||||
done
|
||||
}
|
||||
|
||||
meta() {
|
||||
local id="$1"
|
||||
local tree="$(_tree)"
|
||||
for subid in ${id//\// }; do
|
||||
tree="$(echo "$tree" | jq -c .Branches | jq -c ".[\"$subid\"]")"
|
||||
done
|
||||
echo "$tree" | jq .Leaf
|
||||
}
|
||||
|
||||
_is_deleted() {
|
||||
local id="$1"
|
||||
while [ -n "$id" ]; do
|
||||
if meta "$id" | jq .Deleted | grep -q true; then
|
||||
return 0
|
||||
fi
|
||||
if [ "$id" == "${id%/*}" ]; then
|
||||
return 1
|
||||
fi
|
||||
id="${id%/*}"
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
get() {
|
||||
_get "$@"
|
||||
}
|
||||
|
||||
_get() {
|
||||
_nncurl $NOTES_ADDR/api/v0/files/$1
|
||||
}
|
||||
|
||||
del() {
|
||||
local id="$1"
|
||||
_nncurl \
|
||||
-X DELETE \
|
||||
$NOTES_ADDR/api/v0/files/$id
|
||||
}
|
||||
|
||||
put() {
|
||||
set -u
|
||||
local ret=0
|
||||
if ! _put "$@"; then
|
||||
ret=1
|
||||
fi
|
||||
set +u
|
||||
return $ret
|
||||
}
|
||||
|
||||
_put() {
|
||||
local id="$1"
|
||||
local title="$2"
|
||||
local body="$3"
|
||||
echo "$body" | _nncurl \
|
||||
-X PUT \
|
||||
-H "Title: $title" \
|
||||
-d "$body" \
|
||||
$NOTES_ADDR/api/v0/files/$id
|
||||
}
|
||||
|
||||
"$@"
|
||||
)
|
||||
|
||||
66
crawler/notes_test.sh
Normal file
66
crawler/notes_test.sh
Normal file
@@ -0,0 +1,66 @@
|
||||
#! /bin/bash
|
||||
|
||||
test_ids() {
|
||||
local two_levels='{
|
||||
"Branches": {
|
||||
"id": {
|
||||
"Branches": {
|
||||
"subid": {
|
||||
"Branches": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
notes eval "$(cat <<EOF
|
||||
_tree() { echo '$two_levels'; true; }
|
||||
(ids; true) | grep '^id$' > /dev/null || return 101
|
||||
(ids; true) | grep '^id\/subid$' > /dev/null || return 102
|
||||
ids | wc -l | grep 2 > /dev/null || return 103
|
||||
EOF
|
||||
)"
|
||||
}
|
||||
|
||||
test_meta() {
|
||||
local two_levels='{
|
||||
"Branches": {
|
||||
"id": {
|
||||
"Leaf": {"Title": "top level"},
|
||||
"Branches": {
|
||||
"subid": {
|
||||
"Leaf": {"Title": "sub level"},
|
||||
"Branches": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
notes eval "$(cat <<EOF
|
||||
_tree() { echo '$two_levels'; }
|
||||
meta id | jq .Title | grep -q top.level || return 201
|
||||
meta id/subid | jq .Title | grep -q sub.level || return 202
|
||||
EOF
|
||||
)"
|
||||
}
|
||||
|
||||
test__is_deleted() {
|
||||
local two_levels='{
|
||||
"Branches": {
|
||||
"id": {
|
||||
"Leaf": {"Title": "top level", "Deleted": true},
|
||||
"Branches": {
|
||||
"subid": {
|
||||
"Leaf": {"Title": "sub level"},
|
||||
"Branches": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
notes eval "$(cat <<EOF
|
||||
_tree() { echo '$two_levels'; }
|
||||
_is_deleted id || return 301
|
||||
_is_deleted id/subid || return 302
|
||||
EOF
|
||||
)"
|
||||
}
|
||||
52
crawler/test.sh
Normal file
52
crawler/test.sh
Normal file
@@ -0,0 +1,52 @@
|
||||
#! /bin/bash
|
||||
|
||||
main() {
|
||||
local ret=0
|
||||
for f in ./*_test.sh; do
|
||||
if ! one_main "$f"; then
|
||||
echo failed $f >&2
|
||||
ret=$((ret+1))
|
||||
fi
|
||||
done
|
||||
if [ $ret != 0 ]; then
|
||||
echo failed >&2
|
||||
fi
|
||||
return $ret
|
||||
}
|
||||
|
||||
one_main() (
|
||||
local f="$1"
|
||||
local ret=0
|
||||
for t in $(grep ^test_ "$f" | sed 's/(.*//'); do
|
||||
one_test "$f" "$t"
|
||||
local test_ret=$?
|
||||
if [ $test_ret != 0 ]; then
|
||||
echo failed $f:$t: $test_ret >&2
|
||||
ret=$((ret+1))
|
||||
fi
|
||||
done
|
||||
return $ret
|
||||
)
|
||||
|
||||
one_test() (
|
||||
local f="$1"
|
||||
local t="$2"
|
||||
each
|
||||
source "${f%_test.sh}.sh"
|
||||
source "$f"
|
||||
eval "$t"
|
||||
)
|
||||
|
||||
each() {
|
||||
export CACHE=$(mktemp -d)
|
||||
export GITLAB_PAT=gibberish
|
||||
export NOTES_ADDR=http://127.0.0.1:61111
|
||||
source ./cache.sh
|
||||
set -e
|
||||
set -o pipefail
|
||||
log() { echo "> $*" >&2; }
|
||||
}
|
||||
|
||||
if [ "$0" == "$BASH_SOURCE" ]; then
|
||||
main "$@"
|
||||
fi
|
||||
Reference in New Issue
Block a user