Luke
commited on
Commit
•
68e1313
1
Parent(s):
03b6d75
no message
Browse files- .gitignore +2 -0
- Preprocess/preprocessImg.py +59 -0
- app.py +54 -12
- requirements.txt +2 -1
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.idea/*
|
2 |
+
*.pyc
|
Preprocess/preprocessImg.py
CHANGED
@@ -27,3 +27,62 @@ def preprocess_image002(image):
|
|
27 |
gray = cv2.bilateralFilter(gray, 11, 17, 17) # 雙邊濾波去噪
|
28 |
edged = cv2.Canny(gray, 30, 200) # 邊緣檢測
|
29 |
return Image.fromarray(edged)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
gray = cv2.bilateralFilter(gray, 11, 17, 17) # 雙邊濾波去噪
|
28 |
edged = cv2.Canny(gray, 30, 200) # 邊緣檢測
|
29 |
return Image.fromarray(edged)
|
30 |
+
|
31 |
+
|
32 |
+
# 方案三:自適應門檻和形態學變換
|
33 |
+
def preprocess_image003(image):
|
34 |
+
# 將 PIL Image 轉換為 numpy array
|
35 |
+
image_np = np.array(image)
|
36 |
+
# 轉為灰階影像
|
37 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
38 |
+
# 自適應門檻處理
|
39 |
+
adaptive_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
40 |
+
# 形態學變換 (開運算) 去除小噪點
|
41 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
42 |
+
morph = cv2.morphologyEx(adaptive_thresh, cv2.MORPH_OPEN, kernel)
|
43 |
+
return Image.fromarray(morph)
|
44 |
+
|
45 |
+
|
46 |
+
# 方案四:CLAHE(限制對比度自適應直方圖均衡)
|
47 |
+
def preprocess_image004(image):
|
48 |
+
# 將 PIL Image 轉換為 numpy array
|
49 |
+
image_np = np.array(image)
|
50 |
+
# 轉為灰階影像
|
51 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
52 |
+
# 應用 CLAHE
|
53 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
54 |
+
clahe_image = clahe.apply(gray)
|
55 |
+
# 二值化
|
56 |
+
_, binary = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
57 |
+
return Image.fromarray(binary)
|
58 |
+
|
59 |
+
|
60 |
+
# 方案五:直方圖均衡化和高斯模糊
|
61 |
+
def preprocess_image005(image):
|
62 |
+
# 將 PIL Image 轉換為 numpy array
|
63 |
+
image_np = np.array(image)
|
64 |
+
# 轉為灰階影像
|
65 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
66 |
+
# 直方圖均衡化
|
67 |
+
equalized = cv2.equalizeHist(gray)
|
68 |
+
# 高斯模糊
|
69 |
+
blurred = cv2.GaussianBlur(equalized, (5, 5), 0)
|
70 |
+
# 二值化
|
71 |
+
_, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
72 |
+
return Image.fromarray(binary)
|
73 |
+
|
74 |
+
|
75 |
+
# 方案六:自適應去噪與銳化
|
76 |
+
def preprocess_image006(image):
|
77 |
+
# 將 PIL Image 轉換為 numpy array
|
78 |
+
image_np = np.array(image)
|
79 |
+
# 轉為灰階影像
|
80 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
81 |
+
# 自適應去噪
|
82 |
+
denoised = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
|
83 |
+
# 銳化
|
84 |
+
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
|
85 |
+
sharpened = cv2.filter2D(denoised, -1, kernel)
|
86 |
+
# 二值化
|
87 |
+
_, binary = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
88 |
+
return Image.fromarray(binary)
|
app.py
CHANGED
@@ -2,7 +2,10 @@ import os
|
|
2 |
import gradio as gr
|
3 |
from Plan.AiLLM import llm_recognition
|
4 |
from Plan.pytesseractOCR import ocr_recognition
|
5 |
-
from Preprocess.preprocessImg import
|
|
|
|
|
|
|
6 |
|
7 |
# 取得所有語言清單
|
8 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
@@ -15,8 +18,18 @@ def preprocess_and_ocr(image, valid_type, language):
|
|
15 |
# 方案二
|
16 |
pre_img_002 = preprocess_image002(image)
|
17 |
ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
return pre_img_001, pre_img_002,
|
|
|
20 |
|
21 |
|
22 |
def preprocess_and_llm(image, valid_type, language):
|
@@ -26,34 +39,63 @@ def preprocess_and_llm(image, valid_type, language):
|
|
26 |
# 方案二
|
27 |
pre_img_002 = preprocess_image002(image)
|
28 |
llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
return pre_img_001, pre_img_002,
|
|
|
31 |
|
32 |
|
33 |
with gr.Blocks() as demo:
|
34 |
with gr.Row():
|
35 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
36 |
-
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
37 |
-
preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
|
38 |
-
|
39 |
-
with gr.Row():
|
40 |
validation_type = gr.Dropdown(choices=["身分證正面", "身分證反面"], label="驗證類別")
|
41 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
42 |
-
# preprocessed_type = gr.Radio(["001", "002"], label="解析方案")
|
43 |
|
44 |
with gr.Row():
|
45 |
ocr_button = gr.Button("使用 OCR")
|
46 |
llm_button = gr.Button("使用 AI LLM")
|
47 |
|
48 |
with gr.Row():
|
|
|
49 |
ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
|
|
|
|
|
|
|
50 |
ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
|
55 |
-
outputs=[
|
|
|
|
|
|
|
|
|
56 |
llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
|
57 |
-
outputs=[
|
|
|
|
|
|
|
|
|
58 |
|
59 |
demo.launch(share=False)
|
|
|
2 |
import gradio as gr
|
3 |
from Plan.AiLLM import llm_recognition
|
4 |
from Plan.pytesseractOCR import ocr_recognition
|
5 |
+
from Preprocess.preprocessImg import (
|
6 |
+
preprocess_image001, preprocess_image002, preprocess_image003,
|
7 |
+
preprocess_image004, preprocess_image005
|
8 |
+
)
|
9 |
|
10 |
# 取得所有語言清單
|
11 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
|
|
18 |
# 方案二
|
19 |
pre_img_002 = preprocess_image002(image)
|
20 |
ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
|
21 |
+
# 方案三
|
22 |
+
pre_img_003 = preprocess_image003(image)
|
23 |
+
ocr_result_003 = ocr_recognition(pre_img_003, valid_type, language)
|
24 |
+
# 方案四
|
25 |
+
pre_img_004 = preprocess_image004(image)
|
26 |
+
ocr_result_004 = ocr_recognition(pre_img_004, valid_type, language)
|
27 |
+
# 方案五
|
28 |
+
pre_img_005 = preprocess_image005(image)
|
29 |
+
ocr_result_005 = ocr_recognition(pre_img_005, valid_type, language)
|
30 |
|
31 |
+
return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
|
32 |
+
ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005)
|
33 |
|
34 |
|
35 |
def preprocess_and_llm(image, valid_type, language):
|
|
|
39 |
# 方案二
|
40 |
pre_img_002 = preprocess_image002(image)
|
41 |
llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
|
42 |
+
# 方案三
|
43 |
+
pre_img_003 = preprocess_image003(image)
|
44 |
+
llm_result_003 = llm_recognition(pre_img_003, valid_type, language)
|
45 |
+
# 方案四
|
46 |
+
pre_img_004 = preprocess_image004(image)
|
47 |
+
llm_result_004 = llm_recognition(pre_img_004, valid_type, language)
|
48 |
+
# 方案五
|
49 |
+
pre_img_005 = preprocess_image005(image)
|
50 |
+
llm_result_005 = llm_recognition(pre_img_005, valid_type, language)
|
51 |
|
52 |
+
return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
|
53 |
+
llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005)
|
54 |
|
55 |
|
56 |
with gr.Blocks() as demo:
|
57 |
with gr.Row():
|
58 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
|
|
|
|
|
|
|
|
59 |
validation_type = gr.Dropdown(choices=["身分證正面", "身分證反面"], label="驗證類別")
|
60 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
|
|
61 |
|
62 |
with gr.Row():
|
63 |
ocr_button = gr.Button("使用 OCR")
|
64 |
llm_button = gr.Button("使用 AI LLM")
|
65 |
|
66 |
with gr.Row():
|
67 |
+
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
68 |
ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
|
69 |
+
llm_output_001 = gr.JSON(label="AiLLM-001-解析結果")
|
70 |
+
with gr.Row():
|
71 |
+
preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
|
72 |
ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
|
73 |
+
llm_output_002 = gr.JSON(label="AiLLM-002-解析結果")
|
74 |
+
|
75 |
+
with gr.Row():
|
76 |
+
preprocess_output_003 = gr.Image(type="pil", label="預處理後的圖片-方案三")
|
77 |
+
ocr_output_003 = gr.JSON(label="OCR-003-解析結果")
|
78 |
+
llm_output_003 = gr.JSON(label="AiLLM-003-解析結果")
|
79 |
+
with gr.Row():
|
80 |
+
preprocess_output_004 = gr.Image(type="pil", label="預處理後的圖片-方案四")
|
81 |
+
ocr_output_004 = gr.JSON(label="OCR-004-解析結果")
|
82 |
+
llm_output_004 = gr.JSON(label="AiLLM-004-解析結果")
|
83 |
+
with gr.Row():
|
84 |
+
preprocess_output_005 = gr.Image(type="pil", label="預處理後的圖片-方案五")
|
85 |
+
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
86 |
+
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
87 |
|
88 |
ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
|
89 |
+
outputs=[
|
90 |
+
preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
|
91 |
+
preprocess_output_005,
|
92 |
+
ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005
|
93 |
+
])
|
94 |
llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
|
95 |
+
outputs=[
|
96 |
+
preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
|
97 |
+
preprocess_output_005,
|
98 |
+
llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005
|
99 |
+
])
|
100 |
|
101 |
demo.launch(share=False)
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ transformers
|
|
4 |
Pillow
|
5 |
torch
|
6 |
huggingface-hub
|
7 |
-
opencv-python
|
|
|
|
4 |
Pillow
|
5 |
torch
|
6 |
huggingface-hub
|
7 |
+
opencv-python
|
8 |
+
numpy
|