-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess.py
111 lines (82 loc) · 3.62 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import cv2 as cv
import string
import image_generator
MIN_SOLIDITY = 0.6
FONT_PATH = "./fonts/EnvoyScript.ttf"
def convert_image_black_white(image):
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
mean_intensity = np.mean(gray)
if mean_intensity < 128:
# Dark text on a light background
_, im_bw = cv.threshold(gray, 90, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
else:
# Light text on a dark background
_, im_bw = cv.threshold(gray, 90, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)
return im_bw
def find_bboxes(im_bw):
kernal = cv.getStructuringElement(cv.MORPH_RECT, (15, 45))
dilate = cv.dilate(im_bw, kernal, iterations=1)
contours = cv.findContours(dilate, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
contours = sorted(contours, key=lambda x: cv.boundingRect(x)[0])
image_height, image_width = im_bw.shape[:2]
min_width = int(0.1 * image_width)
min_height = int(0.1 * image_height)
for contour in contours:
# Get the rotated bounding box for the contour
rect = cv.minAreaRect(contour)
box = cv.boxPoints(rect)
box = np.intp(box)
# Calculate the width and height of the rotated bounding box
width = np.linalg.norm(box[0] - box[1])
height = np.linalg.norm(box[1] - box[2])
# Adjust the minimum width and height based on the image size
min_width = int(0.1 * min(im_bw.shape[1], im_bw.shape[0]))
min_height = int(0.1 * min(im_bw.shape[1], im_bw.shape[0]))
if width > min_width and height > min_height:
# Calculate the solidity of the rotated bounding box
solidity = cv.contourArea(contour) / (width * height)
if solidity > MIN_SOLIDITY:
# Draw the rotated bounding box on the image
cv.drawContours(im_bw, [box], 0, (36, 255, 12), 2)
return im_bw
def find_orientation_with_sift(image, template):
sift = cv.SIFT_create()
keypoints1, descriptors1 = sift.detectAndCompute(image, None)
keypoints2, descriptors2 = sift.detectAndCompute(template, None)
bf = cv.BFMatcher()
matches = bf.knnMatch(descriptors1, descriptors2, k=2)
good_matches = []
for m, n in matches:
if m.distance < 0.75 * n.distance:
good_matches.append(m)
# NOTE: Estimate homography matrix
if len(good_matches) >= 4:
src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 5.0)
if M is not None:
angle = np.arctan2(M[1, 0], M[0, 0]) * 180 / np.pi
return angle
return None
def average_rotation_angle_over_alphabet(image, alphabet):
alphabet_angles = {}
generator = image_generator.ImageGenerator(FONT_PATH, 40)
for char in alphabet:
template = np.array(generator.createimage(char))
angle = find_orientation_with_sift(image, template)
if angle is not None:
alphabet_angles[char] = angle
if not alphabet_angles:
return None
mean = np.mean(list(alphabet_angles.values()))
return mean
def preprocess(name):
image = cv.imread(f"./{name}.png")
im_bw = convert_image_black_white(image)
angle = average_rotation_angle_over_alphabet(im_bw, string.ascii_lowercase)
result = str(angle) if angle != None else "FAILED"
print(result)
bbox_image = find_bboxes(im_bw)
cv.imwrite(f"{name}_bbox.png", bbox_image)