parent
9b05003329
commit
780d50bb59
|
|
@ -78,10 +78,14 @@ crawl_with() {
|
|||
export ID="$(echo "$crawlable_source/$TITLE" | base64 | md5sum | awk '{print $1}')"
|
||||
export PID="$(echo $json | jq -r .id)"
|
||||
log " $PID/$ID ($TITLE): ${#CONTENT}"
|
||||
notea put
|
||||
push_crawled
|
||||
done
|
||||
}
|
||||
|
||||
push_crawled() {
|
||||
notea put
|
||||
}
|
||||
|
||||
is_crawlable() {
|
||||
local crawlable_source="$(extract_crawlable_source "$*")"
|
||||
# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file
|
||||
|
|
@ -90,6 +94,8 @@ is_crawlable() {
|
|||
}
|
||||
|
||||
rewrite() {
|
||||
log not impl: rewrite "#abc-def" to "#h-abc-def"
|
||||
log not impl: rewrite "./asdf" to "./zyxw" or "absolute.com/asdf"
|
||||
log not impl rewrite, change images
|
||||
return 1
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue