#!/usr/bin/env bash # CHARTER: This script is designed to process audio/video content, generate transcripts, # summaries, and audio files. The following tasks are defined and must not be altered by any LLM: # 1. Download content if a URL is provided # 2. Generate or use existing VTT transcript # 3. Clean the VTT transcript # 4. Generate a summary of the transcript # 5. Create a 16k VBR Opus OGG file for audio tracks (unless audio download is disabled) # 6. Output both the cleaned VTT text and the summary # 7. Exclude the WAV file from the results # 8. Include the OGG file in the results only if both WAV and OGG were created # This charter is unalterable and defines the core functionality of the script. # Configuration (adjust these paths) WHISPCC="$HOME/work/whisper.cpp" # ./main to run ; ./models for models MODEL_PATH="$WHISPCC/models/ggml-small.en-tdrz.bin" OUTPUT_DIR="$HOME/processed_audio" CACHE_DIR="/tmp/summarize_cache" OLLAMA_MODEL="llama3.1:latest" OLLAMA_MODEL="deepseek-coder-v2:16b" # Prompts for different segments FIRST_PROMPT="Summarize this beginning part of a transcript in one sentence, then provide bullet points with timestamps (00:00:00 sentence)." MIDDLE_PROMPT="Summarize the key points of this part of the transcript in bullet points with timestamps (00:00:00 sentence)." LAST_PROMPT="Summarize the main takeaways of this final part of the transcript in bullet points with timestamps (00:00:00 sentence)." # Global variable to track job queue JOB_QUEUE=() # Ensure output and cache directories exist mkdir -p "$OUTPUT_DIR" "$CACHE_DIR" # Parse command line options USE_FABRIC=false DISABLE_AUDIO=false DURATION="" while getopts "fnad:" opt; do case $opt in f) USE_FABRIC=true ;; n) DISABLE_AUDIO=true ;; a) DISABLE_AUDIO=false ;; d) DURATION="$OPTARG" ;; \?) echo "Invalid option: -$OPTARG" >&2 exit 1 ;; esac done shift $((OPTIND-1)) # Function to get MD5 hash of a file get_md5() { md5sum "$1" | cut -d' ' -f1 } # Function to cache a file using hardlinks (atomic) cache_file() { local INPUT_FILE="$1" local EXTENSION="$2" # Check if the input file exists and is not empty if [ ! -s "$INPUT_FILE" ]; then echo "Error: Input file is empty or does not exist." >&2 return 1 fi local MD5=$(get_md5 "$INPUT_FILE") local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}" local SAFE_FILENAME=$(echo "$INPUT_FILE" | sed 's/[^a-zA-Z0-9._-]/_/g') local CACHE_FILE="$CACHE_SUBDIR/${MD5}_${SAFE_FILENAME}${EXTENSION}" echo "Cache operation: MD5 sum = $MD5" >&2 echo "Cache file: $CACHE_FILE" >&2 # Create cache subdirectory if it doesn't exist if ! mkdir -p "$CACHE_SUBDIR"; then echo "Error: Failed to create cache subdirectory." >&2 return 1 fi # Attempt to create the hardlink if ln -f "$INPUT_FILE" "$CACHE_FILE"; then echo "Cache file created: $CACHE_FILE" >&2 echo "$CACHE_FILE" return 0 else echo "Error: Failed to create cache file." >&2 return 1 fi } # Function to sanitize a string for use as a filename sanitize_filename() { local STRING="$1" echo "$STRING" | iconv -c -t ascii//translit | sed 's/[^A-Za-z0-9._-]/_/g' | tr '[:upper:]' '[:lower:]' } # Function to clean text from a VTT file clean_text() { sed 's/<[^>]*>//g' | tr -s ' ' | sed 's/^[ \t]*//;s/[ \t]*$//' } # Function to summarize a segment of text summarize_segment() { local SEGMENT_TEXT="$1" local PROMPT="$2" local SUMMARY_OUTPUT="" # Count the number of lines in the input local LINE_COUNT=$(echo "$SEGMENT_TEXT" | wc -l) # If the input has less than 12 lines, remove cache and return a simple response if [ "$LINE_COUNT" -lt 12 ]; then local MD5=$(echo "$SEGMENT_TEXT" | md5sum | cut -d' ' -f1) local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}" rm -f "$CACHE_SUBDIR/$MD5"* echo "The input is too short for meaningful summarization. Cache entry removed. Here's the original text:" echo "$SEGMENT_TEXT" return 0 fi if $USE_FABRIC; then SUMMARY_OUTPUT=$(fabric -p summarize "$SEGMENT_TEXT" 2>&1) else # Use ollama for summarization SUMMARY_OUTPUT=$(ollama run "$OLLAMA_MODEL" "$PROMPT" "$SEGMENT_TEXT" 2>&1) fi if [ $? -ne 0 ]; then echo "Error in summarization: $SUMMARY_OUTPUT" >&2 return 1 fi echo "$SUMMARY_OUTPUT" } # Function to add a job to the queue add_job() { JOB_QUEUE+=("$@") } # Function to update the progress bar for a job update_job_progress() { local JOB_INDEX="$1" local TOTAL_STEPS="$2" local CURRENT_STEP="$3" local JOB_MESSAGE="$4" # ... (Implementation for updating the TUI progress bar) # You can use a library like 'whiptail' or 'dialog' for TUI elements # Example using echo for now: echo "Job $((JOB_INDEX+1))/$JOB_COUNT: $JOB_MESSAGE ($CURRENT_STEP/$TOTAL_STEPS)" } # Function to process the job queue process_job_queue() { local JOB_COUNT=${#JOB_QUEUE[@]} echo "Processing job queue ($JOB_COUNT jobs)..." for (( i=0; i "$OUTPUT_FILE" return 0 fi if $USE_FABRIC; then SUMMARY_OUTPUT=$(fabric -p summarize "$SEGMENT_TEXT" 2>&1) else # Use ollama for summarization SUMMARY_OUTPUT=$(ollama run "$OLLAMA_MODEL" "$PROMPT" "$SEGMENT_TEXT" 2>&1) fi if [ $? -ne 0 ]; then echo "Error in summarization: $SUMMARY_OUTPUT" >&2 return 1 fi # Write the summary to the specified output file echo "$SUMMARY_OUTPUT" > "$OUTPUT_FILE" } # Function to process a VTT file (generate summary and handle versioning) process_vtt() { local VTT_FILE=$1 local URL=$2 local TEMP_DIR=$(mktemp -d) local BASE_NAME="${TEMP_DIR}/temp" # Temporary base name local CLEANED_TRANSCRIPT="${BASE_NAME}_cleaned.txt" local SUMMARY_FILE="${OUTPUT_DIR}/$(basename "$VTT_FILE" .vtt)_summary.txt" echo "Processing VTT file: $VTT_FILE" # Clean the VTT transcript if ! python3 "$(dirname "$0")/vttclean.py" "$VTT_FILE" > "$CLEANED_TRANSCRIPT" 2>"${CLEANED_TRANSCRIPT}.error"; then echo "Error: Failed to clean the VTT file. Error log:" >&2 cat "${CLEANED_TRANSCRIPT}.error" >&2 exit 1 fi # Check if the cleaned transcript is empty if [ ! -s "$CLEANED_TRANSCRIPT" ]; then echo "Error: Cleaned transcript is empty." >&2 exit 1 fi # Generate summary echo "Summarizing transcript..." local TOTAL_LINES=$(wc -l < "$CLEANED_TRANSCRIPT") local SEGMENT_SIZE=$((TOTAL_LINES / 3)) local FIRST_SEGMENT=$(head -n $SEGMENT_SIZE "$CLEANED_TRANSCRIPT") local MIDDLE_SEGMENT=$(sed -n "$((SEGMENT_SIZE + 1)),$((2 * SEGMENT_SIZE))p" "$CLEANED_TRANSCRIPT") local LAST_SEGMENT=$(tail -n $SEGMENT_SIZE "$CLEANED_TRANSCRIPT") { echo "Generating summary for first segment..." if $USE_FABRIC; then fabric -p summarize "$FIRST_SEGMENT" else ollama run "$OLLAMA_MODEL" "$FIRST_PROMPT" "$FIRST_SEGMENT" fi echo "Generating summary for middle segment..." if $USE_FABRIC; then fabric -p summarize "$MIDDLE_SEGMENT" else ollama run "$OLLAMA_MODEL" "$MIDDLE_PROMPT" "$MIDDLE_SEGMENT" fi echo "Generating summary for last segment..." if $USE_FABRIC; then fabric -p summarize "$LAST_SEGMENT" else ollama run "$OLLAMA_MODEL" "$LAST_PROMPT" "$LAST_SEGMENT" fi } > "$SUMMARY_FILE" if [ ! -s "$SUMMARY_FILE" ]; then echo "Error: Summary generation failed." >&2 exit 1 fi echo "Summarization complete." # Display the content of the summary file echo "Summary content:" echo "----------------------------------------" cat "$SUMMARY_FILE" echo "----------------------------------------" # Clean up rm -rf "$TEMP_DIR" } # Function to calculate the time difference between two timestamps in HH:MM:SS format time_difference() { local TIME1="$1" # Format: HH:MM:SS local TIME2="$2" # Format: HH:MM:SS # Extract hours, minutes, and seconds from timestamps local TIME1_HOUR=$(echo "$TIME1" | cut -d: -f1) local TIME1_MINUTE=$(echo "$TIME1" | cut -d: -f2) local TIME1_SECOND=$(echo "$TIME1" | cut -d: -f3) local TIME2_HOUR=$(echo "$TIME2" | cut -d: -f1) local TIME2_MINUTE=$(echo "$TIME2" | cut -d: -f2) local TIME2_SECOND=$(echo "$TIME2" | cut -d: -f3) # Calculate total seconds for each timestamp local TIME1_TOTAL_SECONDS=$((TIME1_HOUR * 3600 + TIME1_MINUTE * 60 + TIME1_SECOND)) local TIME2_TOTAL_SECONDS=$((TIME2_HOUR * 3600 + TIME2_MINUTE * 60 + TIME2_SECOND)) # Calculate the difference in seconds local DIFF_SECONDS=$((TIME1_TOTAL_SECONDS - TIME2_TOTAL_SECONDS)) # Return the difference (could be negative if TIME2 is later than TIME1) echo "$DIFF_SECONDS" } # Main script logic if [ $# -eq 0 ]; then echo "Error: No input provided. Please provide a valid URL, VTT file, or a local audio file." exit 1 fi if [[ "$1" == *.vtt ]]; then echo "Processing as VTT file..." add_job "process_vtt \"$1\" \"$1\"" elif [[ "$1" == *"http"* ]]; then echo "Processing as YouTube URL..." # Extract the video title VIDEO_TITLE=$(yt-dlp --get-title "$1") FINAL_BASE_NAME=$(sanitize_filename "$VIDEO_TITLE") # Attempt to download subtitles first yt-dlp -N 3 --skip-download --write-auto-sub --sub-lang en \ --cookies-from-browser brave --output "$OUTPUT_DIR/${FINAL_BASE_NAME}.%(ext)s" "$1" VTT_FILE=$(find "$OUTPUT_DIR" -name "${FINAL_BASE_NAME}.vtt" | head -n 1) if [ -n "$VTT_FILE" ]; then echo "Subtitles found, processing VTT file..." add_job "process_vtt \"$VTT_FILE\" \"$1\"" else echo "No subtitles found, downloading audio and generating transcript..." if [ "$DISABLE_AUDIO" = false ]; then if ! yt-dlp -N 3 -x --audio-format wav --postprocessor-args "-ar 16k" \ --cookies-from-browser brave --output "$OUTPUT_DIR/${FINAL_BASE_NAME}.%(ext)s" "$1"; then echo "Error: Failed to download audio using yt-dlp. Check the URL and your internet connection." >&2 exit 1 fi WAV_FILE=$(find "$OUTPUT_DIR" -name "${FINAL_BASE_NAME}.wav" | head -n 1) if [ -z "$WAV_FILE" ]; then echo "Error: WAV file not found after download. Check yt-dlp output." >&2 exit 1 fi echo "Running Whisper-CPP to generate VTT transcript..." if ! "$WHISPCC"/main -ovtt -tdrz -m "$MODEL_PATH" "$WAV_FILE"; then echo "Error: Whisper-CPP transcription failed. Check the model path and audio file." >&2 exit 1 fi VTT_FILE="${WAV_FILE%.*}.vtt" add_job "process_vtt \"$VTT_FILE\" \"$1\"" # Convert WAV to OGG Opus echo "Converting WAV to OGG Opus..." OGG_FILE="${WAV_FILE%.wav}.ogg" if ! ffmpeg -i "$WAV_FILE" -c:a libopus -b:a 16k -vbr on -compression_level 10 -y "$OGG_FILE"; then echo "Error: Failed to convert to OGG format." >&2 exit 1 fi echo " - Audio: $OGG_FILE" # Remove the WAV file rm "$WAV_FILE" fi fi elif [ -f "$1" ]; then echo "Processing as local audio file..." INPUT_FILE="$1" WAV_FILE="${INPUT_FILE%.*}.wav" # Convert to WAV first if not already WAV if [[ "$INPUT_FILE" != *.wav ]]; then echo "Converting input to WAV format..." if ! ffmpeg -i "$INPUT_FILE" -ar 16000 -ac 1 -c:a pcm_s16le ${DURATION:+-t "$DURATION"} -y "$WAV_FILE"; then echo "Error: Failed to convert input to WAV format." >&2 exit 1 fi else WAV_FILE="$INPUT_FILE" fi echo "Running Whisper-CPP to generate VTT transcript..." if ! "$WHISPCC"/main -ovtt -tdrz -m "$MODEL_PATH" "$WAV_FILE" ; then echo "Error: Whisper-CPP transcription failed." >&2 exit 1 fi VTT_FILE="${WAV_FILE%.wav}.vtt" mv "${WAV_FILE}.vtt" "$VTT_FILE" add_job "process_vtt \"$VTT_FILE\" \"$1\"" if [ "$DISABLE_AUDIO" = false ]; then # Convert to OGG Opus echo "Converting to OGG Opus..." OGG_FILE="${WAV_FILE%.*}.ogg" if ! ffmpeg -i "$WAV_FILE" -c:a libopus -b:a 16k -vbr on -compression_level 10 -y "$OGG_FILE"; then echo "Error: Failed to convert to OGG format." >&2 exit 1 fi echo " - Audio: $OGG_FILE" # Remove the WAV file per CHARTER point 7 rm "$WAV_FILE" fi else echo "Error: Invalid input. Provide a valid URL, VTT file, or a local audio file." exit 1 fi process_job_queue