Created
December 13, 2018 11:09
-
-
Save NicoLivesey/c1e13d89d36ed4746ea366b5f0f27c27 to your computer and use it in GitHub Desktop.
Simple baseline to apply ocr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def remove_clear(img, degree): | |
''' | |
Preprocess l'image avant l'algo de détection de texte de manière à avoir une image thresholdée optimale | |
''' | |
# img = cv2.bilateralFilter(img, degree, 75, 75) | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ | |
cv2.THRESH_BINARY,65,11) | |
# _, img = cv2.threshold(img,170,255,cv2.THRESH_BINARY) | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) | |
img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) | |
return img | |
def detect_text(img, thresh): | |
''' | |
Algo de détection de texte basé sur le filtre de gradient de l'image | |
''' | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) | |
grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, kernel) | |
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 1)) | |
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel) | |
_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) | |
mask = np.zeros(bw.shape, dtype=np.uint8) | |
final_mask = np.zeros(gray.shape, dtype=np.uint8) | |
for idx in range(len(contours)): | |
x, y, w, h = cv2.boundingRect(contours[idx]) | |
mask[y:y+h, x:x+w] = 0 | |
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1) | |
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h) | |
if r > 0.3 and w > gray.shape[0]/70 and h > gray.shape[1]/100 and h < gray.shape[1]/15: | |
final_mask[y:(y+h-1), x:(x+w-1)] = 255 | |
fg = cv2.bitwise_and(thresh, thresh, mask = final_mask) | |
inv = cv2.bitwise_not(final_mask) | |
background = np.full(gray.shape, 255, dtype=np.uint8) | |
bk = cv2.bitwise_or(background, background, mask=inv) | |
# combine foreground+background | |
res = cv2.bitwise_or(fg, bk) | |
score = (final_mask == 255).sum()/(final_mask.shape[0]*final_mask.shape[1]) | |
return res, score | |
def image_to_string(image): | |
''' | |
Regular preprocessing and Tesseract job | |
''' | |
thresh = remove_clear(image, 150) | |
final, score = detect_text(image, thresh) | |
# Recognize text with tesseract for python | |
result = pytesseract.image_to_string(final, config = "-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz:. -l fra -oem 1") | |
return result | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment