Spaces:
Running
Running
Create processor.py
Browse files- processor.py +138 -0
processor.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import numpy.typing as npt
|
7 |
+
from loguru import logger
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
|
11 |
+
@dataclass
|
12 |
+
class ProcessedImage:
|
13 |
+
image: npt.NDArray
|
14 |
+
angle: Optional[float] = None
|
15 |
+
enhancement_applied: bool = False
|
16 |
+
preprocessing_history: Optional[list[str]] = None
|
17 |
+
|
18 |
+
def __post_init__(self):
|
19 |
+
"""Initialize preprocessing history if not provided."""
|
20 |
+
if self.preprocessing_history is None:
|
21 |
+
self.preprocessing_history = []
|
22 |
+
if isinstance(self.image, Image.Image):
|
23 |
+
self.image = np.array(self.image.convert("RGB"))
|
24 |
+
|
25 |
+
|
26 |
+
class ImagePreprocessor:
|
27 |
+
@staticmethod
|
28 |
+
def denoise(image: npt.NDArray) -> ProcessedImage:
|
29 |
+
try:
|
30 |
+
if len(image.shape) == 3:
|
31 |
+
denoised = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
|
32 |
+
else:
|
33 |
+
denoised = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
|
34 |
+
|
35 |
+
return ProcessedImage(image=denoised, preprocessing_history=["denoise"])
|
36 |
+
except Exception as e:
|
37 |
+
logger.error(f"Error during denoising: {str(e)}")
|
38 |
+
raise ValueError(f"Failed to denoise image: {str(e)}")
|
39 |
+
|
40 |
+
@staticmethod
|
41 |
+
def deskew(image: npt.NDArray) -> ProcessedImage:
|
42 |
+
"""Correct image skew by detecting and rotating to align text.
|
43 |
+
|
44 |
+
Uses contour detection to find the dominant text angle and corrects it.
|
45 |
+
|
46 |
+
Args:
|
47 |
+
image: Input image as numpy array
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
ProcessedImage: Deskewed image with rotation angle
|
51 |
+
|
52 |
+
Raises:
|
53 |
+
ValueError: If angle detection fails
|
54 |
+
"""
|
55 |
+
try:
|
56 |
+
# Convert to grayscale if needed
|
57 |
+
if len(image.shape) == 3:
|
58 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
59 |
+
else:
|
60 |
+
gray = image
|
61 |
+
|
62 |
+
# Detect edges
|
63 |
+
edges = cv2.Canny(gray, 50, 200, apertureSize=3)
|
64 |
+
lines = cv2.HoughLines(edges, 1, np.pi / 180, 100)
|
65 |
+
|
66 |
+
if lines is None:
|
67 |
+
logger.warning("No lines detected for deskewing")
|
68 |
+
return ProcessedImage(image=image, angle=0)
|
69 |
+
|
70 |
+
# Calculate dominant angle
|
71 |
+
angles = []
|
72 |
+
for _, theta in lines[0]:
|
73 |
+
angle = theta * 180 / np.pi
|
74 |
+
if angle < 45:
|
75 |
+
angles.append(angle)
|
76 |
+
elif angle > 135:
|
77 |
+
angles.append(angle - 180)
|
78 |
+
|
79 |
+
if not angles:
|
80 |
+
return ProcessedImage(image=image, angle=0)
|
81 |
+
|
82 |
+
median_angle = np.median(angles)
|
83 |
+
|
84 |
+
# Rotate image
|
85 |
+
(h, w) = image.shape[:2]
|
86 |
+
center = (w // 2, h // 2)
|
87 |
+
M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
|
88 |
+
rotated = cv2.warpAffine(
|
89 |
+
image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
|
90 |
+
)
|
91 |
+
|
92 |
+
return ProcessedImage(
|
93 |
+
image=rotated, angle=median_angle, preprocessing_history=["deskew"]
|
94 |
+
)
|
95 |
+
except Exception as e:
|
96 |
+
logger.error(f"Error during deskewing: {str(e)}")
|
97 |
+
raise ValueError(f"Failed to deskew image: {str(e)}")
|
98 |
+
|
99 |
+
|
100 |
+
@dataclass()
|
101 |
+
class PreprocessingConfig:
|
102 |
+
"""Configuration for image preprocessing steps."""
|
103 |
+
|
104 |
+
denoise: bool = True
|
105 |
+
deskew: bool = True
|
106 |
+
contrast_enhancement: bool = True
|
107 |
+
threshold: Optional[float] = None
|
108 |
+
resize_factor: Optional[float] = None
|
109 |
+
|
110 |
+
|
111 |
+
class ImageHandler:
|
112 |
+
_supported_formats = {".png", ".jpg", ".jpeg", ".tiff", ".bmp"}
|
113 |
+
|
114 |
+
def __init__(self):
|
115 |
+
self.preprocessor = ImagePreprocessor()
|
116 |
+
|
117 |
+
def preprocess_image(
|
118 |
+
self, image: npt.NDArray, config: PreprocessingConfig
|
119 |
+
) -> ProcessedImage:
|
120 |
+
try:
|
121 |
+
result = ProcessedImage(image=image)
|
122 |
+
|
123 |
+
if config.denoise:
|
124 |
+
denoised = self.preprocessor.denoise(result.image)
|
125 |
+
result.image = denoised.image
|
126 |
+
result.preprocessing_history.extend(denoised.preprocessing_history)
|
127 |
+
|
128 |
+
if config.deskew:
|
129 |
+
deskewed = self.preprocessor.deskew(result.image)
|
130 |
+
result.image = deskewed.image
|
131 |
+
result.angle = deskewed.angle
|
132 |
+
result.preprocessing_history.extend(deskewed.preprocessing_history)
|
133 |
+
|
134 |
+
return result
|
135 |
+
|
136 |
+
except Exception as e:
|
137 |
+
logger.error(f"Preprocessing failed: {str(e)}")
|
138 |
+
raise ValueError(f"Image preprocessing failed: {str(e)}")
|