File size: 4,516 Bytes
bd3b6db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from dataclasses import dataclass
from typing import Optional

import cv2
import numpy as np
import numpy.typing as npt
from loguru import logger
from PIL import Image


@dataclass
class ProcessedImage:
    image: npt.NDArray
    angle: Optional[float] = None
    enhancement_applied: bool = False
    preprocessing_history: Optional[list[str]] = None

    def __post_init__(self):
        """Initialize preprocessing history if not provided."""
        if self.preprocessing_history is None:
            self.preprocessing_history = []
        if isinstance(self.image, Image.Image):
            self.image = np.array(self.image.convert("RGB"))


class ImagePreprocessor:
    @staticmethod
    def denoise(image: npt.NDArray) -> ProcessedImage:
        try:
            if len(image.shape) == 3:
                denoised = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            else:
                denoised = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)

            return ProcessedImage(image=denoised, preprocessing_history=["denoise"])
        except Exception as e:
            logger.error(f"Error during denoising: {str(e)}")
            raise ValueError(f"Failed to denoise image: {str(e)}")

    @staticmethod
    def deskew(image: npt.NDArray) -> ProcessedImage:
        """Correct image skew by detecting and rotating to align text.

        Uses contour detection to find the dominant text angle and corrects it.

        Args:
            image: Input image as numpy array

        Returns:
            ProcessedImage: Deskewed image with rotation angle

        Raises:
            ValueError: If angle detection fails
        """
        try:
            # Convert to grayscale if needed
            if len(image.shape) == 3:
                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            else:
                gray = image

            # Detect edges
            edges = cv2.Canny(gray, 50, 200, apertureSize=3)
            lines = cv2.HoughLines(edges, 1, np.pi / 180, 100)

            if lines is None:
                logger.warning("No lines detected for deskewing")
                return ProcessedImage(image=image, angle=0)

            # Calculate dominant angle
            angles = []
            for _, theta in lines[0]:
                angle = theta * 180 / np.pi
                if angle < 45:
                    angles.append(angle)
                elif angle > 135:
                    angles.append(angle - 180)

            if not angles:
                return ProcessedImage(image=image, angle=0)

            median_angle = np.median(angles)

            # Rotate image
            (h, w) = image.shape[:2]
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
            rotated = cv2.warpAffine(
                image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
            )

            return ProcessedImage(
                image=rotated, angle=median_angle, preprocessing_history=["deskew"]
            )
        except Exception as e:
            logger.error(f"Error during deskewing: {str(e)}")
            raise ValueError(f"Failed to deskew image: {str(e)}")


@dataclass()
class PreprocessingConfig:
    """Configuration for image preprocessing steps."""

    denoise: bool = True
    deskew: bool = True
    contrast_enhancement: bool = True
    threshold: Optional[float] = None
    resize_factor: Optional[float] = None


class ImageHandler:
    _supported_formats = {".png", ".jpg", ".jpeg", ".tiff", ".bmp"}

    def __init__(self):
        self.preprocessor = ImagePreprocessor()

    def preprocess_image(
        self, image: npt.NDArray, config: PreprocessingConfig
    ) -> ProcessedImage:
        try:
            result = ProcessedImage(image=image)

            if config.denoise:
                denoised = self.preprocessor.denoise(result.image)
                result.image = denoised.image
                result.preprocessing_history.extend(denoised.preprocessing_history)

            if config.deskew:
                deskewed = self.preprocessor.deskew(result.image)
                result.image = deskewed.image
                result.angle = deskewed.angle
                result.preprocessing_history.extend(deskewed.preprocessing_history)

            return result

        except Exception as e:
            logger.error(f"Preprocessing failed: {str(e)}")
            raise ValueError(f"Image preprocessing failed: {str(e)}")