Created
June 18, 2016 06:31
-
-
Save bcr/8d871cae54c1cdb0dd351cb1cb504e82 to your computer and use it in GitHub Desktop.
OpenCV fake OCR example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_image_display/py_image_display.html#display-image | |
#http://docs.opencv.org/3.0-beta/modules/imgproc/doc/miscellaneous_transformations.html | |
#http://docs.opencv.org/2.4/doc/tutorials/imgproc/threshold/threshold.html | |
import cv2 | |
# Original image for annotation | |
img = cv2.imread('CSH-drink-4k.jpg') | |
# Grayscale image | |
gray = cv2.imread('CSH-drink-4k.jpg',0) | |
# Threshold image, save a copy to admire | |
thresh = cv2.threshold(gray, 243, 255, cv2.THRESH_BINARY_INV)[1] | |
cv2.imwrite('CSH-drink-4k-thresh.png',thresh) | |
# The ultimate strategy is to identify all of the contours and find a bounding | |
# box for them. Then we'll dump all of the ones that don't fit ones and zeroes | |
# and then identify the ones and zeroes based on the size of the box. | |
# Find all of the contours in the image. | |
im2, contours, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) | |
# Each bounding rectangle is [x,y,w,h]. So rectangle[1] is y and rectangle[3] is height | |
# Determine if two rectangles are vertically overlapping (on the same line) | |
def voverlapping(rectangle1, rectangle2): | |
return not ((rectangle1[1] > (rectangle2[1] + rectangle2[3])) or | |
((rectangle1[1] + rectangle1[3]) < rectangle2[1])) | |
# Each bounding rectangle is [x,y,w,h] | |
# | |
# We first map from contours to bounding rectangles, then we filter that based | |
# on rectangle size criteria, then we sort that based on x, y position left to | |
# right, top to bottom. The final pile of rectangles should all be ones and | |
# zeroes, identifiable by size. | |
boundingRectangles = sorted( | |
filter( | |
lambda rectangle: ((rectangle[2] > 20) and (rectangle[3] > 40) and (rectangle[2] < 40) and (rectangle[3] < 60)), | |
map(lambda contour: cv2.boundingRect(contour), contours) | |
), | |
lambda rectangle1, rectangle2: rectangle1[0] - rectangle2[0] if voverlapping(rectangle1, rectangle2) else rectangle1[1] - rectangle2[1] | |
) | |
# Ones are smaller than zeroes, so make an array of bits with their bounding | |
# rectangle using the size for identification. | |
bitsAndRectangles = map(lambda rectangle: ((1 if rectangle[2] < 30 else 0), rectangle), boundingRectangles) | |
# Convert all the bits to a giant string and print it | |
bitsString = reduce(lambda finalString, bitAndRectangle: finalString + str(bitAndRectangle[0]), bitsAndRectangles, "") | |
print bitsString | |
# Take the original color image and annotate it with the bounding for each bit | |
# along with our interpretation of the bit value and a circle in the corner for | |
# bit zero in an eight bit byte. | |
fontFace = cv2.FONT_HERSHEY_SIMPLEX | |
fontScale = 1 | |
fontThickness = 2 | |
bitcount = 0 | |
for rectangle in bitsAndRectangles: | |
[x,y,w,h] = rectangle[1] | |
# Draw the bounding | |
cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,0),1) | |
# Draw our interpretation of the bit value in the lower left | |
cv2.putText(img,str(rectangle[0]),(x+1,y+h-4), fontFace, fontScale,(0,0,0),fontThickness,cv2.LINE_AA) | |
# If we are drawing bit zero then draw a filled circle to indicate it | |
if (bitcount % 8) == 0: | |
cv2.circle(img,(x + 4 + 2, y + 4 + 2), 4, (0, 0, 0), -1) | |
bitcount += 1 | |
# Put the image with annotations | |
cv2.imwrite('CSH-drink-4k-annotated.png',img) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment