hugo/posts.sh

320 lines
7.7 KiB
Bash

#! /bin/bash
HUGO_POSTS="${HUGO_POSTS:-"./posts"}"
RCLONE_ALIAS="${RCLONE_ALIAS:-"hugo"}"
RCLONE_CONFIG="${RCLONE_CONFIG:-./rclone.conf}"
RCLONE_FOLDER="${RCLONE_FOLDER:-"shared_from_bbarl64"}"
RCLONE_FOLDER_POSTS="${RCLONE_FOLDER_POSTS:-"posts"}"
RCLONE_FOLDER_META="${RCLONE_FOLDER_META:-"meta"}"
RCLONE_OPTS="${RCLONE_OPTS:-"--drive-shared-with-me"}"
TMPDIR="${TMPDIR:-"/tmp"}"
main() {
flags "$@"
posts
meta
}
flags() {
set -e
set -o pipefail
cd "$(dirname "$BASH_SOURCE")"
}
posts() (
list() {
gd list_posts
}
up_to_date() {
local metadatad="$(metadatad "$1")"
if [ -f "$metadatad" ]; then
if [ "$1" == "$(cat "$metadatad")" ]; then
return 0
fi
fi
return 1
}
pulled() {
echo "$TMPDIR/$(echo "$1" | jq -r .Name)"
}
pull() {
local filename="$(echo "$json" | jq -r .Name)"
gd pull_posts "$filename"
}
extracted() {
local pulled_file="$(pulled "$1")"
local date="$(echo "$1" | jq -r .ModTime)"
date="${date%%T*}"
local pulled_file_safe_basename="$(basename "$pulled_file" | sed 's/.zip$//' | sed 's/[^a-zA-Z0-9]/_/g')"
echo "$TMPDIR/${date}_${pulled_file_safe_basename}"
}
extract() {
local extracted_dir="$(extracted "$1")"
local index_html="$extracted_dir/.index.html"
local index_md="$extracted_dir/index.md"
local pulled_file="$(pulled "$1")"
local u_date_iso="$(echo "$1" | jq -r .ModTime)"
if [ -d "$extracted_dir" ]; then
rm -rf "$extracted_dir"
fi
mkdir -p "$extracted_dir"
7z x -o"$extracted_dir" "$pulled_file"
extracted_zip_to_post "$(echo "$1" | jq -r .Name | sed 's/.zip$//')" "$extracted_dir" "$u_date_iso"
}
imported() {
local extracted_dir="$(extracted "$1")"
echo "$HUGO_POSTS/$(basename "$extracted_dir")"
}
import() {
local extracted_dir="$(extracted "$1")"
local target_dir="$(imported "$1")"
if [ -d "$target_dir" ]; then
rm -rf "$target_dir"
fi
mv "$extracted_dir" "$target_dir"
}
metadatad() {
local imported="$(imported "$1")"
echo "$imported"/.metadata.json
}
metadata() {
local metadatad="$(metadatad "$1")"
echo "$1" > "$metadatad"
}
local want=()
for b64_json in $(list); do
local json="$(echo "$b64_json" | base64 --decode)"
local filename="$(echo "$json" | jq -r .Name)"
want+=("$(imported "$json")")
if up_to_date "$json"; then
log "$filename: up to date"
continue
fi
log "$filename: pull"
pull "$json"
log "$filename: extract"
extract "$json"
log "$filename: import"
import "$json"
log "$filename: metadata"
metadata "$json"
rm "$(pulled "$json")"
done
for d in "$HUGO_POSTS"/*; do
if [[ ! " ${want[*]} " =~ " $d " ]]; then
log "$d: stale, pruning"
rm -rf "$d"
fi
done
)
meta() {
list() {
gd list_meta
}
pull() {
local filename="$(echo "$json" | jq -r .Name)"
gd pull_meta "$filename"
}
pulled() {
echo "$TMPDIR/$(echo "$1" | jq -r .Name)"
}
meta_file() {
echo "./meta/.overrides/$(echo "$1" | jq -r .Name)/.meta"
}
up_to_date() {
test -f "$(meta_file "$1")" && test "$1" == "$(cat "$(meta_file "$1")")" && test -e "$(must "$1")"
}
extracted_dir() {
echo "./meta/.overrides/$(echo "$1" | jq -r .Name)"
}
extracted() {
echo "./meta/.overrides/$(echo "$1" | jq -r .Name | sed 's/.zip$//')"
}
must() {
echo "./meta/$(basename "$(extracted "$1")")"
}
extract() {
local extracted_dir="$(extracted_dir "$1")"
rm -rf "$extracted_dir" "${extracted_dir%.zip}"
mkdir -p "$extracted_dir"
7z x -o"$extracted_dir" "$(pulled "$1")"
local extracted_file="$(ls "$extracted_dir"/*.html)"
if [ $(ls "$extracted_dir" | wc -l) == 1 ]; then
cp "$extracted_file" "$(extracted "$1")"
extracted_file="$(extracted "$1")"
else
mv "$extracted_file" "$(dirname "$extracted_file")"/index.md
cp -r "$extracted_dir" "$(extracted "$1")"
fi
local extracted="$(extracted "$1")"
case "${extracted##*.}" in
yaml ) true ;;
* ) return ;;
esac
cat "$extracted" | html_to_plaintext > "$extracted.2"; mv "$extracted.2" "$extracted"
}
merge() {
local extracted="$(extracted "$1")"
case "${extracted##*.}" in
yaml ) merge_yaml "$1" ;;
* )
esac
}
merge_yaml() {
local override="$(extracted "$1")"
local default="$(echo "$override" | sed 's/\.overrides/\.default/')"
if [ ! -f "$default" ]; then
return
fi
cat "$override" | yq -j eval - | jq > "$override.json"
cat "$default" | yq -j eval - | jq > "$default.json"
jq -s '.[0] * .[1]' "$default.json" "$override.json" | yq -P eval - > ./meta/"$(basename "$override")"
rm "$override.json" "$default.json"
}
import() {
local must="$(must "$1")"
if [ -e "$must" ]; then
rm -rf "$must"
fi
merge "$1"
if [ -e "$must" ]; then
return
fi
cp -r "${must%/*}/.overrides/${must##*/}" "$must"
}
for b64_json in $(list); do
local json="$(echo "$b64_json" | base64 --decode | jq .)"
local filename="$(echo "$json" | jq -r .Name)"
if up_to_date "$json"; then
log "$filename: up to date"
continue
fi
log "$filename: pulling"
pull "$json"
log "$filename: extracting"
extract "$json"
import "$json"
echo "$json" > "$(meta_file "$json")"
done
}
fatal() {
log "$@"
exit 2
}
log() {
echo "$(date +%H:%M:%S) > $*" >&2
}
gd() (
list_posts() {
gd list "$RCLONE_FOLDER_POSTS"
}
list_meta() {
gd list "$RCLONE_FOLDER_META"
}
list() {
rc lsjson "$RCLONE_ALIAS:$RCLONE_FOLDER/$1" \
| jq -c .[] \
| grep -E 'zip"' \
| while read -r line; do
echo "$line" | base64
done
}
pull_posts() {
pull "$RCLONE_FOLDER_POSTS/$1"
}
pull_meta() {
pull "$RCLONE_FOLDER_META/$1"
}
pull() {
rc copy "$RCLONE_ALIAS:$RCLONE_FOLDER/$1" "$TMPDIR"/
}
rc() {
rclone \
--config "$RCLONE_CONFIG" \
--fast-list \
$RCLONE_OPTS \
"$@"
}
"$@"
)
html_to_plaintext() {
sed 's/<style.*<\/style>//' | sed 's/&nbsp;/ /g' | sed -e 's/<[^>]*>/\n/g' | grep .
#sed 's/<[^>]*>//g'
}
html_only_body() {
sed 's/.*<body/<body/' \
| sed 's/<\/body>.*/<\/body>/'
}
extracted_zip_to_post() {
local title="$1"
local extracted_zip="$2"
local u_date_iso="$3"
local index_md="$extracted_zip/index.md"
local index_html="$extracted_zip/.index.html"
cat "$extracted_zip"/*.html | html_only_body > "$index_html"
local tags=($(cat "$index_html" | html_to_plaintext | grep -o '#[a-zA-Z0-9]*' | grep '[a-zA-Z]' | sed 's/^#//' | sort -u))
local tags_csv="$(first=false; for tag in "${tags[@]}"; do true; if $first; then echo -n ", "; fi; first=true; echo -n "$tag"; done)"
printf '
---
title: "%s"
date: %s
draft: false
tags: [%s]
---
' \
"$(echo "$title" | sed 's/"/\\"/g')" \
"$u_date_iso" \
"$tags_csv" \
| sed 's/^[ ]*//' \
| grep . \
> "$index_md"
cat "$index_html" >> "$index_md"
echo "" >> "$index_md"
rm "$index_html" "$extracted_zip"/*.html
for ext in png jpg jpeg gif JPG; do find "$extracted_zip" -name "*.$ext"; done | while read -r line; do
convert "$line" -ordered-dither o8x8,8,8,4 "$line.2"
mv "$line.2" "$line"
done
}
if [ "$0" == "$BASH_SOURCE" ]; then
main "$@"
fi