327 lines
8.0 KiB
Bash
327 lines
8.0 KiB
Bash
#! /bin/bash
|
|
|
|
HUGO_POSTS="${HUGO_POSTS:-"./posts"}"
|
|
RCLONE_ALIAS="${RCLONE_ALIAS:-"hugo"}"
|
|
RCLONE_CONFIG="${RCLONE_CONFIG:-./rclone.conf}"
|
|
RCLONE_FOLDER="${RCLONE_FOLDER:-"shared_from_bbarl64"}"
|
|
RCLONE_FOLDER_POSTS="${RCLONE_FOLDER_POSTS:-"posts"}"
|
|
RCLONE_FOLDER_META="${RCLONE_FOLDER_META:-"meta"}"
|
|
RCLONE_OPTS="${RCLONE_OPTS:-"--drive-shared-with-me"}"
|
|
TMPDIR="${TMPDIR:-"/tmp"}"
|
|
|
|
main() {
|
|
flags "$@"
|
|
posts
|
|
meta
|
|
}
|
|
|
|
flags() {
|
|
set -e
|
|
set -o pipefail
|
|
cd "$(dirname "$BASH_SOURCE")"
|
|
}
|
|
|
|
posts() (
|
|
list() {
|
|
gd list_posts
|
|
}
|
|
|
|
up_to_date() {
|
|
local metadatad="$(metadatad "$1")"
|
|
if [ -f "$metadatad" ]; then
|
|
if [ "$1" == "$(cat "$metadatad")" ]; then
|
|
return 0
|
|
fi
|
|
fi
|
|
return 1
|
|
}
|
|
|
|
pulled() {
|
|
echo "$TMPDIR/$(echo "$1" | jq -r .Name)"
|
|
}
|
|
|
|
pull() {
|
|
local filename="$(echo "$json" | jq -r .Name)"
|
|
gd pull_posts "$filename"
|
|
}
|
|
|
|
extracted() {
|
|
local pulled_file="$(pulled "$1")"
|
|
local date="$(echo "$1" | jq -r .ModTime)"
|
|
date="${date%%T*}"
|
|
local pulled_file_safe_basename="$(basename "$pulled_file" | sed 's/.zip$//' | sed 's/[^a-zA-Z0-9]/_/g')"
|
|
echo "$TMPDIR/${date}_${pulled_file_safe_basename}"
|
|
}
|
|
|
|
extract() {
|
|
local extracted_dir="$(extracted "$1")"
|
|
local index_html="$extracted_dir/.index.html"
|
|
local index_md="$extracted_dir/index.md"
|
|
local pulled_file="$(pulled "$1")"
|
|
local u_date_iso="$(echo "$1" | jq -r .ModTime)"
|
|
|
|
if [ -d "$extracted_dir" ]; then
|
|
rm -rf "$extracted_dir"
|
|
fi
|
|
mkdir -p "$extracted_dir"
|
|
7z x -o"$extracted_dir" "$pulled_file"
|
|
|
|
extracted_zip_to_post "$(echo "$1" | jq -r .Name | sed 's/.zip$//')" "$extracted_dir" "$u_date_iso"
|
|
}
|
|
|
|
imported() {
|
|
local extracted_dir="$(extracted "$1")"
|
|
echo "$HUGO_POSTS/$(basename "$extracted_dir")"
|
|
}
|
|
|
|
import() {
|
|
local extracted_dir="$(extracted "$1")"
|
|
local target_dir="$(imported "$1")"
|
|
if [ -d "$target_dir" ]; then
|
|
rm -rf "$target_dir"
|
|
fi
|
|
mv "$extracted_dir" "$target_dir"
|
|
}
|
|
|
|
metadatad() {
|
|
local imported="$(imported "$1")"
|
|
echo "$imported"/.metadata.json
|
|
}
|
|
|
|
metadata() {
|
|
local metadatad="$(metadatad "$1")"
|
|
echo "$1" > "$metadatad"
|
|
}
|
|
|
|
local want=()
|
|
for b64_json in $(list); do
|
|
local json="$(echo "$b64_json" | base64 --decode)"
|
|
local filename="$(echo "$json" | jq -r .Name)"
|
|
want+=("$(imported "$json")")
|
|
if up_to_date "$json"; then
|
|
log "$filename: up to date"
|
|
continue
|
|
fi
|
|
log "$filename: pull"
|
|
pull "$json"
|
|
log "$filename: extract"
|
|
extract "$json"
|
|
log "$filename: import"
|
|
import "$json"
|
|
log "$filename: metadata"
|
|
metadata "$json"
|
|
rm "$(pulled "$json")"
|
|
done
|
|
for d in "$HUGO_POSTS"/*; do
|
|
if [[ ! " ${want[*]} " =~ " $d " ]]; then
|
|
log "$d: stale, pruning"
|
|
rm -rf "$d"
|
|
fi
|
|
done
|
|
)
|
|
|
|
meta() {
|
|
list() {
|
|
gd list_meta
|
|
}
|
|
|
|
pull() {
|
|
local filename="$(echo "$json" | jq -r .Name)"
|
|
gd pull_meta "$filename"
|
|
}
|
|
|
|
pulled() {
|
|
echo "$TMPDIR/$(echo "$1" | jq -r .Name)"
|
|
}
|
|
|
|
meta_file() {
|
|
echo "./meta/.overrides/$(echo "$1" | jq -r .Name)/.meta"
|
|
}
|
|
|
|
up_to_date() {
|
|
test -f "$(meta_file "$1")" && test "$1" == "$(cat "$(meta_file "$1")")" && test -e "$(must "$1")"
|
|
}
|
|
|
|
extracted_dir() {
|
|
echo "./meta/.overrides/$(echo "$1" | jq -r .Name)"
|
|
}
|
|
|
|
extracted() {
|
|
echo "./meta/.overrides/$(echo "$1" | jq -r .Name | sed 's/.zip$//')"
|
|
}
|
|
|
|
must() {
|
|
echo "./meta/$(basename "$(extracted "$1")")"
|
|
}
|
|
|
|
extract() {
|
|
local extracted_dir="$(extracted_dir "$1")"
|
|
rm -rf "$extracted_dir" "${extracted_dir%.zip}"
|
|
mkdir -p "$extracted_dir"
|
|
7z x -o"$extracted_dir" "$(pulled "$1")"
|
|
local extracted_file="$(ls "$extracted_dir"/*.html)"
|
|
if [ $(ls "$extracted_dir" | wc -l) == 1 ]; then
|
|
cp "$extracted_file" "$(extracted "$1")"
|
|
extracted_file="$(extracted "$1")"
|
|
else
|
|
cp -r "$extracted_dir" "$(extracted "$1")"
|
|
fi
|
|
local extracted="$(extracted "$1")"
|
|
case "${extracted##*.}" in
|
|
yaml ) true ;;
|
|
* ) return ;;
|
|
esac
|
|
cat "$extracted" | html_to_plaintext > "$extracted.2"; mv "$extracted.2" "$extracted"
|
|
}
|
|
|
|
merge() {
|
|
local extracted="$(extracted "$1")"
|
|
case "${extracted##*.}" in
|
|
yaml ) merge_yaml "$1" ;;
|
|
* )
|
|
esac
|
|
}
|
|
|
|
merge_yaml() {
|
|
local override="$(extracted "$1")"
|
|
local default="$(echo "$override" | sed 's/\.overrides/\.default/')"
|
|
if [ ! -f "$default" ]; then
|
|
return
|
|
fi
|
|
cat "$override" | yq -j eval - | jq > "$override.json"
|
|
cat "$default" | yq -j eval - | jq > "$default.json"
|
|
jq -s '.[0] * .[1]' "$default.json" "$override.json" | yq -P eval - > ./meta/"$(basename "$override")"
|
|
rm "$override.json" "$default.json"
|
|
}
|
|
|
|
import() {
|
|
local must="$(must "$1")"
|
|
if [ -e "$must" ]; then
|
|
rm -rf "$must"
|
|
fi
|
|
log "import: merging $1"
|
|
merge "$1"
|
|
log "must=$must"
|
|
if [ -e "$must" ]; then
|
|
return
|
|
fi
|
|
log "must not created by merge, continuing"
|
|
cp -r "${must%/*}/.overrides/${must##*/}" "$must"
|
|
local title="$(echo "$1" | jq -r .Name | sed 's/.zip$//')"
|
|
local u_date_iso="$(echo "$1" | jq -r .ModTime)"
|
|
log "extracting zip to post: title=$title, must=$must, date=$u_date_iso"
|
|
extracted_zip_to_post "$title" "$must" "$u_date_iso"
|
|
}
|
|
|
|
for b64_json in $(list); do
|
|
local json="$(echo "$b64_json" | base64 --decode | jq .)"
|
|
local filename="$(echo "$json" | jq -r .Name)"
|
|
if up_to_date "$json"; then
|
|
log "$filename: up to date"
|
|
continue
|
|
fi
|
|
log "$filename: pulling"
|
|
pull "$json"
|
|
log "$filename: extracting"
|
|
extract "$json"
|
|
log "$filename: importing"
|
|
import "$json"
|
|
echo "$json" > "$(meta_file "$json")"
|
|
done
|
|
}
|
|
|
|
fatal() {
|
|
log "$@"
|
|
exit 2
|
|
}
|
|
|
|
log() {
|
|
echo "$(date +%H:%M:%S) > $*" >&2
|
|
}
|
|
|
|
gd() (
|
|
list_posts() {
|
|
gd list "$RCLONE_FOLDER_POSTS"
|
|
}
|
|
list_meta() {
|
|
gd list "$RCLONE_FOLDER_META"
|
|
}
|
|
list() {
|
|
rc lsjson "$RCLONE_ALIAS:$RCLONE_FOLDER/$1" \
|
|
| jq -c .[] \
|
|
| grep -E 'zip"' \
|
|
| while read -r line; do
|
|
echo "$line" | base64
|
|
done
|
|
}
|
|
pull_posts() {
|
|
pull "$RCLONE_FOLDER_POSTS/$1"
|
|
}
|
|
pull_meta() {
|
|
pull "$RCLONE_FOLDER_META/$1"
|
|
}
|
|
pull() {
|
|
rc copy "$RCLONE_ALIAS:$RCLONE_FOLDER/$1" "$TMPDIR"/
|
|
}
|
|
rc() {
|
|
rclone \
|
|
--config "$RCLONE_CONFIG" \
|
|
--fast-list \
|
|
$RCLONE_OPTS \
|
|
"$@"
|
|
}
|
|
|
|
"$@"
|
|
)
|
|
|
|
html_to_plaintext() {
|
|
sed 's/<style.*<\/style>//' | sed 's/ / /g' | sed -e 's/<[^>]*>/\n/g' | grep .
|
|
#sed 's/<[^>]*>//g'
|
|
}
|
|
|
|
html_only_body() {
|
|
sed 's/.*<body/<body/' \
|
|
| sed 's/<\/body>.*/<\/body>/'
|
|
}
|
|
|
|
extracted_zip_to_post() {
|
|
local title="$1"
|
|
local extracted_zip="$2"
|
|
local u_date_iso="$3"
|
|
|
|
local index_md="$extracted_zip/index.md"
|
|
local index_html="$extracted_zip/.index.html"
|
|
|
|
cat "$extracted_zip"/*.html | html_only_body > "$index_html"
|
|
local tags=($(cat "$index_html" | html_to_plaintext | grep -o '#[a-zA-Z0-9]*' | grep '[a-zA-Z]' | sed 's/^#//' | sort -u))
|
|
local tags_csv="$(first=false; for tag in "${tags[@]}"; do true; if $first; then echo -n ", "; fi; first=true; echo -n "$tag"; done)"
|
|
|
|
printf '
|
|
---
|
|
title: "%s"
|
|
date: %s
|
|
draft: false
|
|
tags: [%s]
|
|
---
|
|
' \
|
|
"$(echo "$title" | sed 's/"/\\"/g')" \
|
|
"$u_date_iso" \
|
|
"$tags_csv" \
|
|
| sed 's/^[ ]*//' \
|
|
| grep . \
|
|
> "$index_md"
|
|
|
|
cat "$index_html" >> "$index_md"
|
|
echo "" >> "$index_md"
|
|
rm -f "$index_html" "$extracted_zip"/*.html
|
|
|
|
for ext in png jpg jpeg gif JPG; do find "$extracted_zip" -name "*.$ext"; done | while read -r line; do
|
|
convert "$line" -ordered-dither o8x8,8,8,4 "$line.2"
|
|
mv "$line.2" "$line"
|
|
done
|
|
}
|
|
|
|
if [ "$0" == "$BASH_SOURCE" ]; then
|
|
main "$@"
|
|
fi
|