#! /bin/bash HUGO_POSTS="${HUGO_POSTS:-"./posts"}" RCLONE_ALIAS="${RCLONE_ALIAS:-"hugo"}" RCLONE_CONFIG="${RCLONE_CONFIG:-./rclone.conf}" RCLONE_FOLDER="${RCLONE_FOLDER:-"shared_from_bbarl64"}" RCLONE_FOLDER_POSTS="${RCLONE_FOLDER_POSTS:-"posts"}" RCLONE_FOLDER_META="${RCLONE_FOLDER_META:-"meta"}" RCLONE_OPTS="${RCLONE_OPTS:-"--drive-shared-with-me"}" TMPDIR="${TMPDIR:-"/tmp"}" main() { flags "$@" posts meta } flags() { set -e set -o pipefail cd "$(dirname "$BASH_SOURCE")" } posts() ( list() { gd list_posts } up_to_date() { local metadatad="$(metadatad "$1")" if [ -f "$metadatad" ]; then if [ "$1" == "$(cat "$metadatad")" ]; then return 0 fi fi return 1 } pulled() { echo "$TMPDIR/$(echo "$1" | jq -r .Name)" } pull() { local filename="$(echo "$json" | jq -r .Name)" gd pull_posts "$filename" } extracted() { local pulled_file="$(pulled "$1")" local date="$(echo "$1" | jq -r .ModTime)" date="${date%%T*}" local pulled_file_safe_basename="$(basename "$pulled_file" | sed 's/.zip$//' | sed 's/[^a-zA-Z0-9]/_/g')" echo "$TMPDIR/${date}_${pulled_file_safe_basename}" } extract() { local extracted_dir="$(extracted "$1")" local index_html="$extracted_dir/.index.html" local index_md="$extracted_dir/index.md" local pulled_file="$(pulled "$1")" local u_date_iso="$(echo "$1" | jq -r .ModTime)" if [ -d "$extracted_dir" ]; then rm -rf "$extracted_dir" fi mkdir -p "$extracted_dir" 7z x -o"$extracted_dir" "$pulled_file" cat "$extracted_dir"/*.html | html_only_body > "$index_html" local tags=($(cat "$index_html" | html_to_plaintext | grep -o '#[a-zA-Z0-9]*' | grep '[a-zA-Z]' | sed 's/^#//' | sort -u)) local tags_csv="$(first=false; for tag in "${tags[@]}"; do true; if $first; then echo -n ", "; fi; first=true; echo -n "$tag"; done)" printf ' --- title: "%s" date: %s draft: false tags: [%s] --- ' \ "$(basename "$pulled_file" | sed 's/.zip$//' | sed 's/"/\\"/g')" \ "$u_date_iso" \ "$tags_csv" \ | sed 's/^[ ]*//' \ | grep . \ > "$index_md" cat "$index_html" >> "$index_md" echo "" >> "$index_md" rm "$index_html" "$extracted_dir"/*.html for ext in png jpg jpeg gif JPG; do find "$extracted_dir" -name "*.$ext"; done | while read -r line; do convert "$line" -ordered-dither o8x8,8,8,4 "$line.2" mv "$line.2" "$line" done } imported() { local extracted_dir="$(extracted "$1")" echo "$HUGO_POSTS/$(basename "$extracted_dir")" } import() { local extracted_dir="$(extracted "$1")" local target_dir="$(imported "$1")" if [ -d "$target_dir" ]; then rm -rf "$target_dir" fi mv "$extracted_dir" "$target_dir" } metadatad() { local imported="$(imported "$1")" echo "$imported"/.metadata.json } metadata() { local metadatad="$(metadatad "$1")" echo "$1" > "$metadatad" } local want=() for b64_json in $(list); do local json="$(echo "$b64_json" | base64 --decode)" local filename="$(echo "$json" | jq -r .Name)" want+=("$(imported "$json")") if up_to_date "$json"; then log "$filename: up to date" continue fi log "$filename: pull" pull "$json" log "$filename: extract" extract "$json" log "$filename: import" import "$json" log "$filename: metadata" metadata "$json" rm "$(pulled "$json")" done for d in "$HUGO_POSTS"/*; do if [[ ! " ${want[*]} " =~ " $d " ]]; then log "$d: stale, pruning" rm -rf "$d" fi done ) meta() { list() { gd list_meta } pull() { local filename="$(echo "$json" | jq -r .Name)" gd pull_meta "$filename" } pulled() { echo "$TMPDIR/$(echo "$1" | jq -r .Name)" } meta_file() { echo "./meta/.overrides/$(echo "$1" | jq -r .Name)/.meta" } up_to_date() { test -f "$(meta_file "$1")" && test "$1" == "$(cat "$(meta_file "$1")")" && test -e "$(must "$1")" } extracted_dir() { echo "./meta/.overrides/$(echo "$1" | jq -r .Name)" } extracted() { echo "./meta/.overrides/$(echo "$1" | jq -r .Name | sed 's/.zip$//')" } must() { echo "./meta/$(basename "$(extracted "$1")")" } extract() { local extracted_dir="$(extracted_dir "$1")" rm -rf "$extracted_dir" "${extracted_dir%.zip}" mkdir -p "$extracted_dir" 7z x -o"$extracted_dir" "$(pulled "$1")" local extracted_file="$(ls "$extracted_dir"/*.html)" if [ $(ls "$extracted_dir" | wc -l) == 1 ]; then cp "$extracted_file" "$(extracted "$1")" extracted_file="$(extracted "$1")" else mv "$extracted_file" "$(dirname "$extracted_file")"/index.md mv "$extracted_dir" "$(extracted "$1")" fi local extracted="$(extracted "$1")" case "${extracted##*.}" in yaml ) true ;; * ) return ;; esac cat "$extracted" | html_to_plaintext > "$extracted.2"; mv "$extracted.2" "$extracted" } merge() { local extracted="$(extracted "$1")" case "${extracted##*.}" in yaml ) merge_yaml "$1" ;; * ) esac } merge_yaml() { local override="$(extracted "$1")" local default="$(echo "$override" | sed 's/\.overrides/\.default/')" if [ ! -f "$default" ]; then return fi cat "$override" | yq -j eval - | jq > "$override.json" cat "$default" | yq -j eval - | jq > "$default.json" jq -s '.[0] * .[1]' "$default.json" "$override.json" | yq -P eval - > ./meta/"$(basename "$override")" } import() { local must="$(must "$1")" if [ -f "$must" ]; then rm "$must" fi merge "$1" if [ -f "$must" ]; then return fi log cp -r "${must%/*}/.overrides/${must##*/}" "$must" cp -r "${must%/*}/.overrides/${must##*/}" "$must" } for b64_json in $(list); do local json="$(echo "$b64_json" | base64 --decode | jq .)" local filename="$(echo "$json" | jq -r .Name)" if up_to_date "$json"; then log "$filename: up to date" continue fi log "$filename: pulling" pull "$json" log "$filename: extracting" extract "$json" import "$json" echo "$json" > "$(meta_file "$json")" done } fatal() { log "$@" exit 2 } log() { echo "$(date +%H:%M:%S) > $*" >&2 } gd() ( list_posts() { gd list "$RCLONE_FOLDER_POSTS" } list_meta() { gd list "$RCLONE_FOLDER_META" } list() { rc lsjson "$RCLONE_ALIAS:$RCLONE_FOLDER/$1" \ | jq -c .[] \ | grep -E 'zip"' \ | while read -r line; do echo "$line" | base64 done } pull_posts() { pull "$RCLONE_FOLDER_POSTS/$1" } pull_meta() { pull "$RCLONE_FOLDER_META/$1" } pull() { rc copy "$RCLONE_ALIAS:$RCLONE_FOLDER/$1" "$TMPDIR"/ } rc() { rclone \ --config "$RCLONE_CONFIG" \ --fast-list \ $RCLONE_OPTS \ "$@" } "$@" ) html_to_plaintext() { sed 's///' | sed 's/ / /g' | sed -e 's/<[^>]*>/\n/g' | grep . #sed 's/<[^>]*>//g' } html_only_body() { sed 's/.*.*/<\/body>/' } if [ "$0" == "$BASH_SOURCE" ]; then main "$@" fi