relative links go back to original
parent
52818b8c24
commit
935e806de2
|
|
@ -5,13 +5,17 @@ gitlab() (
|
||||||
echo "$*" | grep -q gitlab.app && ! echo "$*" | grep -q '/wikis/'
|
echo "$*" | grep -q gitlab.app && ! echo "$*" | grep -q '/wikis/'
|
||||||
}
|
}
|
||||||
|
|
||||||
get() {
|
human_url() {
|
||||||
local url="$1"
|
_url "$@" | sed 's/api.v4.projects.//' | sed 's/%2F/\//g' | sed 's/.raw$//' | sed 's/repository\/files/-\/tree\/master/'
|
||||||
|
}
|
||||||
|
|
||||||
|
_url() {
|
||||||
|
local base_url="$1"
|
||||||
local blob="$(echo "$2" | base64 --decode)"
|
local blob="$(echo "$2" | base64 --decode)"
|
||||||
|
|
||||||
local project="$(_url_to_project_root "$url" | head -n 1)"
|
local project="$(_url_to_project_root "$base_url" | head -n 1)"
|
||||||
project="$(urlencode "$project")"
|
project="$(urlencode "$project")"
|
||||||
local root="$(_url_to_project_root "$url" | tail -n 1)"
|
local root="$(_url_to_project_root "$base_url" | tail -n 1)"
|
||||||
if [ -n "$root" ]; then
|
if [ -n "$root" ]; then
|
||||||
blob="${root%/}/${blob#/}"
|
blob="${root%/}/${blob#/}"
|
||||||
blob="${blob#/}"
|
blob="${blob#/}"
|
||||||
|
|
@ -20,8 +24,12 @@ gitlab() (
|
||||||
blob="$(urlencode "$blob")"
|
blob="$(urlencode "$blob")"
|
||||||
|
|
||||||
local path="api/v4/projects/$project/repository/files/$blob/raw"
|
local path="api/v4/projects/$project/repository/files/$blob/raw"
|
||||||
log _gcurl "https://gitlab-app.eng.qops.net/$path (blob=$blob, project=$project)"
|
log "url: https://gitlab-app.eng.qops.net/$path (blob=$blob, project=$project)"
|
||||||
_gcurl "https://gitlab-app.eng.qops.net/$path"
|
echo "https://gitlab-app.eng.qops.net/$path"
|
||||||
|
}
|
||||||
|
|
||||||
|
get() {
|
||||||
|
_gcurl "$(_url "$@")"
|
||||||
}
|
}
|
||||||
|
|
||||||
expand() {
|
expand() {
|
||||||
|
|
|
||||||
|
|
@ -118,10 +118,14 @@ crawl_with() {
|
||||||
full_title="${full_title%/}"
|
full_title="${full_title%/}"
|
||||||
full_title="${full_title#/}"
|
full_title="${full_title#/}"
|
||||||
export TITLE="${full_title##*/}"
|
export TITLE="${full_title##*/}"
|
||||||
|
local human_url="$($backend human_url "$crawlable_source" "$i")"
|
||||||
export CONTENT="$(
|
export CONTENT="$(
|
||||||
echo "**!! WARNING !! This page is autogenerated and prone to destruction and replacement**"
|
echo "**!! WARNING !! This page is autogenerated and prone to destruction and replacement**"
|
||||||
$backend get "$crawlable_source" "$i"
|
echo "**[See the original]($human_url)**"
|
||||||
|
$backend get "$crawlable_source" "$i" \
|
||||||
|
| sed 's/](\([^#h]\)/]\(%%%\1/g'
|
||||||
)"
|
)"
|
||||||
|
export CONTENT="${CONTENT//"%%%"/"${human_url%/*}/"}"
|
||||||
export ID="$(
|
export ID="$(
|
||||||
local sum="$pid/"
|
local sum="$pid/"
|
||||||
local title_so_far=""
|
local title_so_far=""
|
||||||
|
|
@ -139,7 +143,7 @@ crawl_with() {
|
||||||
)"
|
)"
|
||||||
ID="${ID%/}"
|
ID="${ID%/}"
|
||||||
log " $ID ($TITLE): ${#CONTENT}"
|
log " $ID ($TITLE): ${#CONTENT}"
|
||||||
push_crawled "$PID/$ID" "$TITLE" "$CONTENT"
|
push_crawled "$ID" "$TITLE" "$CONTENT"
|
||||||
}
|
}
|
||||||
if [ "${#expanded[@]}" -gt 0 ]; then
|
if [ "${#expanded[@]}" -gt 0 ]; then
|
||||||
for i in $(seq 0 $(("${#expanded[@]}"-1))); do
|
for i in $(seq 0 $(("${#expanded[@]}"-1))); do
|
||||||
|
|
@ -162,8 +166,8 @@ is_crawlable() {
|
||||||
}
|
}
|
||||||
|
|
||||||
rewrite() {
|
rewrite() {
|
||||||
log not impl: rewrite "#abc-def" to "#h-abc-def"
|
log not impl: rewrite "./asdf" to "absolute.com/asdf"
|
||||||
log not impl: rewrite "./asdf" to "./zyxw" or "absolute.com/asdf"
|
log not impl: rewrite "#abc-def?f=abc" to "#h-abc-def?f=abc" or better dont depend on query params so much
|
||||||
log not impl rewrite, change images
|
log not impl rewrite, change images
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,19 @@
|
||||||
todo:
|
todo:
|
||||||
- link to original in generated/scraped
|
- only 1 pid link in tree as title
|
||||||
- rewrite links if available to local
|
|
||||||
- rewrite anchors (maybe gitlab already works :^))
|
|
||||||
- mark generated via meta so other files in the dir can be created, deleted, replaced safely
|
- mark generated via meta so other files in the dir can be created, deleted, replaced safely
|
||||||
- ui; last updated; 2022.02.01T12:34:56
|
- ui; last updated; 2022.02.01T12:34:56
|
||||||
- fix links
|
|
||||||
- put images @server
|
- put images @server
|
||||||
- fix images
|
- fix images
|
||||||
- scrape odo
|
- scrape odo
|
||||||
- scrape gdoc
|
- scrape gdoc
|
||||||
- scrape gsheet
|
- scrape gsheet
|
||||||
|
- scrape gslide
|
||||||
|
- anchor links work
|
||||||
|
- rewrite links if available to local
|
||||||
done:
|
done:
|
||||||
|
- fix links
|
||||||
|
- rewrite anchors (maybe gitlab already works :^))
|
||||||
|
- link to original in generated/scraped
|
||||||
- buttons to invis
|
- buttons to invis
|
||||||
- damned width css
|
- damned width css
|
||||||
- css
|
- css
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue