Spaces:
Running
Running
from dataclasses import dataclass | |
from typing import Optional | |
import cv2 | |
import numpy as np | |
import numpy.typing as npt | |
from loguru import logger | |
from PIL import Image | |
class ProcessedImage: | |
image: npt.NDArray | |
angle: Optional[float] = None | |
enhancement_applied: bool = False | |
preprocessing_history: Optional[list[str]] = None | |
def __post_init__(self): | |
"""Initialize preprocessing history if not provided.""" | |
if self.preprocessing_history is None: | |
self.preprocessing_history = [] | |
if isinstance(self.image, Image.Image): | |
self.image = np.array(self.image.convert("RGB")) | |
class ImagePreprocessor: | |
def denoise(image: npt.NDArray) -> ProcessedImage: | |
try: | |
if len(image.shape) == 3: | |
denoised = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) | |
else: | |
denoised = cv2.fastNlMeansDenoising(image, None, 10, 7, 21) | |
return ProcessedImage(image=denoised, preprocessing_history=["denoise"]) | |
except Exception as e: | |
logger.error(f"Error during denoising: {str(e)}") | |
raise ValueError(f"Failed to denoise image: {str(e)}") | |
def deskew(image: npt.NDArray) -> ProcessedImage: | |
"""Correct image skew by detecting and rotating to align text. | |
Uses contour detection to find the dominant text angle and corrects it. | |
Args: | |
image: Input image as numpy array | |
Returns: | |
ProcessedImage: Deskewed image with rotation angle | |
Raises: | |
ValueError: If angle detection fails | |
""" | |
try: | |
# Convert to grayscale if needed | |
if len(image.shape) == 3: | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
else: | |
gray = image | |
# Detect edges | |
edges = cv2.Canny(gray, 50, 200, apertureSize=3) | |
lines = cv2.HoughLines(edges, 1, np.pi / 180, 100) | |
if lines is None: | |
logger.warning("No lines detected for deskewing") | |
return ProcessedImage(image=image, angle=0) | |
# Calculate dominant angle | |
angles = [] | |
for _, theta in lines[0]: | |
angle = theta * 180 / np.pi | |
if angle < 45: | |
angles.append(angle) | |
elif angle > 135: | |
angles.append(angle - 180) | |
if not angles: | |
return ProcessedImage(image=image, angle=0) | |
median_angle = np.median(angles) | |
# Rotate image | |
(h, w) = image.shape[:2] | |
center = (w // 2, h // 2) | |
M = cv2.getRotationMatrix2D(center, median_angle, 1.0) | |
rotated = cv2.warpAffine( | |
image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE | |
) | |
return ProcessedImage( | |
image=rotated, angle=median_angle, preprocessing_history=["deskew"] | |
) | |
except Exception as e: | |
logger.error(f"Error during deskewing: {str(e)}") | |
raise ValueError(f"Failed to deskew image: {str(e)}") | |
class PreprocessingConfig: | |
"""Configuration for image preprocessing steps.""" | |
denoise: bool = True | |
deskew: bool = True | |
contrast_enhancement: bool = True | |
threshold: Optional[float] = None | |
resize_factor: Optional[float] = None | |
class ImageHandler: | |
_supported_formats = {".png", ".jpg", ".jpeg", ".tiff", ".bmp"} | |
def __init__(self): | |
self.preprocessor = ImagePreprocessor() | |
def preprocess_image( | |
self, image: npt.NDArray, config: PreprocessingConfig | |
) -> ProcessedImage: | |
try: | |
result = ProcessedImage(image=image) | |
if config.denoise: | |
denoised = self.preprocessor.denoise(result.image) | |
result.image = denoised.image | |
result.preprocessing_history.extend(denoised.preprocessing_history) | |
if config.deskew: | |
deskewed = self.preprocessor.deskew(result.image) | |
result.image = deskewed.image | |
result.angle = deskewed.angle | |
result.preprocessing_history.extend(deskewed.preprocessing_history) | |
return result | |
except Exception as e: | |
logger.error(f"Preprocessing failed: {str(e)}") | |
raise ValueError(f"Image preprocessing failed: {str(e)}") | |