jnorthrup's picture
Upload 12 files
14daa4c verified
#!/bin/bash
# Variables
BATCH_SIZE=30
COUNTER=0
OUTPUT_DIR="batches"
RESULTS_FILE="ocr_results.txt"
# Create output directory if not exists
mkdir -p "$OUTPUT_DIR"
# Clear results file
: > "$RESULTS_FILE"
# Loop through PNG files in batches of $BATCH_SIZE
for FILE in output-*.png; do
# Add file to batch array
BATCH_FILES+=("$FILE")
COUNTER=$((COUNTER + 1))
# Process batch when size is reached or on the last file
if (( COUNTER % BATCH_SIZE == 0 || COUNTER == $(ls output-*.png | wc -l) )); then
# Create batch file name
BATCH_NAME="${OUTPUT_DIR}/batch_$((COUNTER / BATCH_SIZE)).png"
# Use ffmpeg to concatenate files vertically
ffmpeg -y -i "concat:$(printf '%s|' "${BATCH_FILES[@]}" | sed 's/|$//')" -vf vstack "$BATCH_NAME"
# Run easyocr on the concatenated image
echo "Processing $BATCH_NAME..."
easyocr -l en -f "$BATCH_NAME" --gpu True >> "$RESULTS_FILE"
# Reset batch files array
BATCH_FILES=()
fi
done
echo "OCR processing complete. Results saved to $RESULTS_FILE."