Skip to content

Instantly share code, notes, and snippets.

@mkorkmaz
Created January 26, 2019 15:34
Show Gist options
  • Save mkorkmaz/3b19d706729e00b0e7e40d81ee67987b to your computer and use it in GitHub Desktop.
Save mkorkmaz/3b19d706729e00b0e7e40d81ee67987b to your computer and use it in GitHub Desktop.
Solves simple captchas has only digits
import cv2
import pytesseract
from os import walk
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract'
config = u" -c tessedit_char_whitelist=0123456789 --oem 3 --psm 7 -l digits"
def solve(file):
image_file_name = './data/' + file
im = cv2.imread(image_file_name)
ret,thresh1 = cv2.threshold(im, 150, 255, cv2.THRESH_BINARY_INV)
cv2.imwrite('/tmp/temp.jpg', thresh1)
ocr = pytesseract.image_to_string('/tmp/temp.jpg', config=config).replace(' ', '').replace('/', '7').replace('?', '7')
expected = image_file_name.replace('.jpg', '').replace('./data/', '')
if ocr == expected:
print('\33[2;32m')
print('-----------------------------------')
print('OCR\t:' + ocr)
print('EXPT\t:' + expected)
else:
print('\33[1;31m')
print('-----------------------------------')
print('OCR\t:' + ocr)
print('EXPT\t:' + expected)
print('\33[m')
files = []
for (dirpath, dirnames, filenames) in walk('./data'):
files.extend(filenames)
break
for file in files:
solve(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment