parent
9b05003329
commit
780d50bb59
|
|
@ -78,10 +78,14 @@ crawl_with() {
|
||||||
export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')"
|
export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')"
|
||||||
export PID="$(echo $json | jq -r .id)"
|
export PID="$(echo $json | jq -r .id)"
|
||||||
log " $PID/$ID ($TITLE): ${#CONTENT}"
|
log " $PID/$ID ($TITLE): ${#CONTENT}"
|
||||||
notea put
|
push_crawled
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
push_crawled() {
|
||||||
|
notea put
|
||||||
|
}
|
||||||
|
|
||||||
is_crawlable() {
|
is_crawlable() {
|
||||||
local crawlable_source="$(extract_crawlable_source "$*")"
|
local crawlable_source="$(extract_crawlable_source "$*")"
|
||||||
# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file
|
# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file
|
||||||
|
|
@ -90,6 +94,8 @@ is_crawlable() {
|
||||||
}
|
}
|
||||||
|
|
||||||
rewrite() {
|
rewrite() {
|
||||||
|
log not impl: rewrite "#abc-def" to "#h-abc-def"
|
||||||
|
log not impl: rewrite "./asdf" to "./zyxw" or "absolute.com/asdf"
|
||||||
log not impl rewrite, change images
|
log not impl rewrite, change images
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue