Spaces:
Running
Running
File size: 4,516 Bytes
bd3b6db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from dataclasses import dataclass
from typing import Optional
import cv2
import numpy as np
import numpy.typing as npt
from loguru import logger
from PIL import Image
@dataclass
class ProcessedImage:
image: npt.NDArray
angle: Optional[float] = None
enhancement_applied: bool = False
preprocessing_history: Optional[list[str]] = None
def __post_init__(self):
"""Initialize preprocessing history if not provided."""
if self.preprocessing_history is None:
self.preprocessing_history = []
if isinstance(self.image, Image.Image):
self.image = np.array(self.image.convert("RGB"))
class ImagePreprocessor:
@staticmethod
def denoise(image: npt.NDArray) -> ProcessedImage:
try:
if len(image.shape) == 3:
denoised = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
else:
denoised = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
return ProcessedImage(image=denoised, preprocessing_history=["denoise"])
except Exception as e:
logger.error(f"Error during denoising: {str(e)}")
raise ValueError(f"Failed to denoise image: {str(e)}")
@staticmethod
def deskew(image: npt.NDArray) -> ProcessedImage:
"""Correct image skew by detecting and rotating to align text.
Uses contour detection to find the dominant text angle and corrects it.
Args:
image: Input image as numpy array
Returns:
ProcessedImage: Deskewed image with rotation angle
Raises:
ValueError: If angle detection fails
"""
try:
# Convert to grayscale if needed
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Detect edges
edges = cv2.Canny(gray, 50, 200, apertureSize=3)
lines = cv2.HoughLines(edges, 1, np.pi / 180, 100)
if lines is None:
logger.warning("No lines detected for deskewing")
return ProcessedImage(image=image, angle=0)
# Calculate dominant angle
angles = []
for _, theta in lines[0]:
angle = theta * 180 / np.pi
if angle < 45:
angles.append(angle)
elif angle > 135:
angles.append(angle - 180)
if not angles:
return ProcessedImage(image=image, angle=0)
median_angle = np.median(angles)
# Rotate image
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
rotated = cv2.warpAffine(
image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
)
return ProcessedImage(
image=rotated, angle=median_angle, preprocessing_history=["deskew"]
)
except Exception as e:
logger.error(f"Error during deskewing: {str(e)}")
raise ValueError(f"Failed to deskew image: {str(e)}")
@dataclass()
class PreprocessingConfig:
"""Configuration for image preprocessing steps."""
denoise: bool = True
deskew: bool = True
contrast_enhancement: bool = True
threshold: Optional[float] = None
resize_factor: Optional[float] = None
class ImageHandler:
_supported_formats = {".png", ".jpg", ".jpeg", ".tiff", ".bmp"}
def __init__(self):
self.preprocessor = ImagePreprocessor()
def preprocess_image(
self, image: npt.NDArray, config: PreprocessingConfig
) -> ProcessedImage:
try:
result = ProcessedImage(image=image)
if config.denoise:
denoised = self.preprocessor.denoise(result.image)
result.image = denoised.image
result.preprocessing_history.extend(denoised.preprocessing_history)
if config.deskew:
deskewed = self.preprocessor.deskew(result.image)
result.image = deskewed.image
result.angle = deskewed.angle
result.preprocessing_history.extend(deskewed.preprocessing_history)
return result
except Exception as e:
logger.error(f"Preprocessing failed: {str(e)}")
raise ValueError(f"Image preprocessing failed: {str(e)}")
|