#! /bin/bash main() { set -euo pipefail input_wav="$(realpath "$1")" model="$(realpath "${2:-../models/ggml-small.en.bin}")" already_transcribed="${3:-false}" sanitized_wav="${input_wav%.*}.mono-16khz.wav" ffmpeg -y -i "$input_wav" -ac 1 -ar 16k "$sanitized_wav" if ! $already_transcribed; then pushd "$(dirname "$(realpath "$BASH_SOURCE")")" cd ../gitea-whisper-rs/ cargo run --example wav_subtitles -- "$model" "$sanitized_wav" popd fi out_to_srt ../gitea-whisper-rs/transcript.txt > "${input_wav%.*}.srt" ffmpeg -y \ -loop 1 -i sc.jpg \ -i "$input_wav" \ -i "${input_wav%.*}.srt" \ -c:v libx264 \ -tune stillimage \ -pix_fmt yuv420p -shortest \ "${input_wav%.*}.mkv" ls "${input_wav%.*}.mkv" } out_to_srt() { cs_to_ts() { echo "$1" | awk '{ printf "%02d:%02d:%02d,000", int(($1/100.0)/60/60), int(($1/100.0)/60%60), int(($1/100.0)%60) }' } cat "$1" \ | ( i=0 while read -r line; do ((i+=1)) echo "$i" echo "$(cs_to_ts "$( echo "${line%%:] *}" \ | tr -d '[' \ | awk '{print $1}' )") --> $(cs_to_ts "$( echo "${line%%:] *}" \ | tr -d '[' \ | awk '{print $3}' )")" echo "${line#*: }" echo done ) } if [ "$0" == "$BASH_SOURCE" ]; then main "$@" fi