#!/bin/bash

# Variables
BATCH_SIZE=30
COUNTER=0
OUTPUT_DIR="batches"
RESULTS_FILE="ocr_results.txt"

# Create output directory if not exists
mkdir -p "$OUTPUT_DIR"

# Clear results file
: > "$RESULTS_FILE"

# Loop through PNG files in batches of $BATCH_SIZE
for FILE in output-*.png; do
    # Add file to batch array
    BATCH_FILES+=("$FILE")
    COUNTER=$((COUNTER + 1))
    
    # Process batch when size is reached or on the last file
    if (( COUNTER % BATCH_SIZE == 0 || COUNTER == $(ls output-*.png | wc -l) )); then
        # Create batch file name
        BATCH_NAME="${OUTPUT_DIR}/batch_$((COUNTER / BATCH_SIZE)).png"
        
        # Use ffmpeg to concatenate files vertically
        ffmpeg -y -i "concat:$(printf '%s|' "${BATCH_FILES[@]}" | sed 's/|$//')" -vf vstack "$BATCH_NAME"
        
        # Run easyocr on the concatenated image
        echo "Processing $BATCH_NAME..."
        easyocr -l en -f "$BATCH_NAME" --gpu True >> "$RESULTS_FILE"
        
        # Reset batch files array
        BATCH_FILES=()
    fi
done

echo "OCR processing complete. Results saved to $RESULTS_FILE."