-
-
Save akash-ch2812/d42acf86e4d6562819cf4cd37d1195e7 to your computer and use it in GitHub Desktop.
# use this command to install open cv2 | |
# pip install opencv-python | |
# use this command to install PIL | |
# pip install Pillow | |
import cv2 | |
from PIL import Image | |
def mark_region(imagE_path): | |
im = cv2.imread(image_path) | |
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) | |
blur = cv2.GaussianBlur(gray, (9,9), 0) | |
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30) | |
# Dilate to combine adjacent text contours | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9)) | |
dilate = cv2.dilate(thresh, kernel, iterations=4) | |
# Find contours, highlight text areas, and extract ROIs | |
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
cnts = cnts[0] if len(cnts) == 2 else cnts[1] | |
line_items_coordinates = [] | |
for c in cnts: | |
area = cv2.contourArea(c) | |
x,y,w,h = cv2.boundingRect(c) | |
if y >= 600 and x <= 1000: | |
if area > 10000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
if y >= 2400 and x<= 2000: | |
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3) | |
line_items_coordinates.append([(x,y), (2200, y+h)]) | |
return image, line_items_coordinates |
This code is very useful and provided by the author, @akash-ch2812, in the context of a terrific Medium article.
There are a few quirks though:
imagE_path
is misspelled in line 10, it should beimage_path
.- The variable name
image
is used instead ofim
in the loop: so the returned image only has the final region. The variable name should be consistentlyim
orimage
through all occurrences so that the returned image contains all the regions. - This code hard-codes assumptions on the input image in lines 31-38 that will not work for generic image files.
Here is a version that addresses these problems:
# use this command to install open cv2
# pip install opencv-python
import cv2
def mark_region(image_path):
image = cv2.imread(image_path)
# define threshold of regions to ignore
THRESHOLD_REGION_IGNORE = 40
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)
# Dilate to combine adjacent text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours, highlight text areas, and extract ROIs
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
continue
image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (x+w, y+h)])
return image, line_items_coordinates
And this code does not have usage:
# use this command to install open cv2
# pip install opencv-python
# use this command to install matplotlib
# pip install matplotlib
import cv2
import matplotlib.pyplot as plt
# [... define the mark_region method ...]
FILENAME = "path/to/some_image.jpg"
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
Hope this answers your question @doughnet!
This code is very useful and provided by the author, @akash-ch2812, in the context of a terrific Medium article.
There are a few quirks though:
imagE_path
is misspelled in line 10, it should beimage_path
.- The variable name
image
is used instead ofim
in the loop: so the returned image only has the final region. The variable name should be consistentlyim
orimage
through all occurrences so that the returned image contains all the regions.- This code hard-codes assumptions on the input image in lines 31-38 that will not work for generic image files.
Here is a version that addresses these problems:
# use this command to install open cv2 # pip install opencv-python import cv2 def mark_region(image_path): image = cv2.imread(image_path) # define threshold of regions to ignore THRESHOLD_REGION_IGNORE = 40 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (9,9), 0) thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30) # Dilate to combine adjacent text contours kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9)) dilate = cv2.dilate(thresh, kernel, iterations=4) # Find contours, highlight text areas, and extract ROIs cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] line_items_coordinates = [] for c in cnts: area = cv2.contourArea(c) x, y, w, h = cv2.boundingRect(c) if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE: continue image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3) line_items_coordinates.append([(x,y), (x+w, y+h)]) return image, line_items_coordinatesAnd this code does not have usage:
# use this command to install open cv2 # pip install opencv-python # use this command to install matplotlib # pip install matplotlib import cv2 import matplotlib.pyplot as plt # [... define the mark_region method ...] FILENAME = "path/to/some_image.jpg" image, line_items_coordinates = mark_region(FILENAME) plt.figure(figsize=(20,20)) plt.imshow(image)Hope this answers your question @doughnet!
Thank you. I’ll give this another go. Of course a couple days ago my OCR development box got wiped so I’ll start from fresh and hopefully have more thank. Thank you again for the help.
Hope this answers your question @doughnet!
Tried it this evening but it does nothing still with the updated code you provided. I don't understand how running: python3 Marking_ROI.py gives it enough information to specify what folder or image to do. Is it looking for images in the same path or which path? What file names is it looking for? Are all the scripts supposed to be run from another script and not run individually? These is such a lack of information provided in the original medium article ... doesn't help when there are variable errors like imagE_path ....
Would you mind giving step by step with proper details? Should be super simple to follow instructions when they are properly documented. Another error in the Medium article is the very first pdf2image script; nowhere does it mention to change the "provide path to pdf file" to your filename; so simple to just state in the Medium article to replace this and that to get it working instead of wasting user's time.
@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.
Going back to your question: This is not a script that you can just use externally. You cannot just call python3 Marking_ROI.py
to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post the mark_regions()
code can be used the following way:
# use this command to install open cv2
# pip install opencv-python
# use this command to install matplotlib
# pip install matplotlib
import cv2
import matplotlib.pyplot as plt
# [... define the mark_region method ...]
FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image
where obviously you have to edit FILENAME
to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).
@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.
Going back to your question: This is not a script that you can just use externally. You cannot just call
python3 Marking_ROI.py
to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post themark_regions()
code can be used the following way:# use this command to install open cv2 # pip install opencv-python # use this command to install matplotlib # pip install matplotlib import cv2 import matplotlib.pyplot as plt # [... define the mark_region method ...] FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want image, line_items_coordinates = mark_region(FILENAME) plt.figure(figsize=(20,20)) plt.imshow(image) plt.savefig("image-with-regions.png") # <--- added this to output an imagewhere obviously you have to edit
FILENAME
to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).
Of course thank you for the time spent on “improving” and “correcting” the original author’s scripts. The way I work and pretty everyone in my field is if you’re going to do the work; do it well.
“improved” details would be helpful to know what someone is getting them self into. A simple one line stating to expect modifying the code because it is not complete would help users (or myself) know that it needs to be modified to work.
I’ll try this out again; see if it works.
EDIT: The additional details ended up working. Thank you for adding the few lines of comments and details to have it work properly.
EDIT: The additional details ended up working. Thank you for adding the few lines of comments and details to have it work properly.
You're welcome.
@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.
Going back to your question: This is not a script that you can just use externally. You cannot just callpython3 Marking_ROI.py
to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post themark_regions()
code can be used the following way:# use this command to install open cv2 # pip install opencv-python # use this command to install matplotlib # pip install matplotlib import cv2 import matplotlib.pyplot as plt # [... define the mark_region method ...] FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want image, line_items_coordinates = mark_region(FILENAME) plt.figure(figsize=(20,20)) plt.imshow(image) plt.savefig("image-with-regions.png") # <--- added this to output an imagewhere obviously you have to edit
FILENAME
to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).Of course thank you for the time spent on “improving” and “correcting” the original author’s scripts. The way I work and pretty everyone in my field is if you’re going to do the work; do it well.
“improved” details would be helpful to know what someone is getting them self into. A simple one line stating to expect modifying the code because it is not complete would help users (or myself) know that it needs to be modified to work.
I’ll try this out again; see if it works.
EDIT: The additional details ended up working. Thank you for adding the few lines of comments and details to have it work properly.
Thank you for the constructive feedback. My article on medium was particularly written for users exploring the document intelligence domain. As mentioned by @jlumbroso, It was just an introduction to a concept which I thought might be worth sharing with the community. I don't know about your ways of working but for me if Its all about getting the concept first and then jumping on to the code.
The gists here on Github were specifically made as a supporting material for the original article. Again @jlumbroso thank you for helping out with the spell checks and typos.
Hope this answers your questions about the article intent and typos in code.
@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.
Going back to your question: This is not a script that you can just use externally. You cannot just call
python3 Marking_ROI.py
to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post themark_regions()
code can be used the following way:# use this command to install open cv2 # pip install opencv-python # use this command to install matplotlib # pip install matplotlib import cv2 import matplotlib.pyplot as plt # [... define the mark_region method ...] FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want image, line_items_coordinates = mark_region(FILENAME) plt.figure(figsize=(20,20)) plt.imshow(image) plt.savefig("image-with-regions.png") # <--- added this to output an imagewhere obviously you have to edit
FILENAME
to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).
@jlumbroso - Thank you for the kind words 👍
Dear @akash-ch2812 or @jlumbroso,
I'm having some trouble using this code on some PDFs I'm working on, I thought either you might have some ideas on what's going wrong.
I have a single PDF called, "DNR_WFH.pdf". I was able to use the first section of code below to split the PDF into separate pages and name them accordingly.
from pdf2image import convert_from_path
pdfs = r"C:\Users\mhiebing\Desktop\DNR_WFH.pdf"
pages = convert_from_path(pdfs, 350)
i = 1
for page in pages:
image_name = "Page_" + str(i) + ".jpg"
page.save(image_name, "JPEG")
i = i+1
For the second section, I'm only looking at the first JPEG to keep things simple. When I call the mark_region(image_path)
function nothing happens. Is there supposed to be a window where I outline the boxes we want to extract text from? Here's what I have for the second section:
import cv2
import matplotlib.pyplot as plt
def mark_region(image_path):
#define the mark_region method
FILENAME = r"C:\Users\mhiebing\Documents\GitHub_Repos\MonthlyStatsExtract\Page_1.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image
image = cv2.imread(image_path)
# define threshold of regions to ignore
THRESHOLD_REGION_IGNORE = 40
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)
# Dilate to combine adjacent text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours, highlight text areas, and extract ROIs
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
continue
image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (x+w, y+h)])
return image, line_items_coordinates
mark_region
Thank you for putting up the article and supplying the code!
@Matthew-Hiebing
I'm just trying to adapt the existing fantastic code and I found your question.
This code snippet of your code is inside the function mark_region but should be outside. This because you are calling the function mark_region(FILENAME)
and pass a FILENAME:
#define the mark_region method
FILENAME = r"C:\Users\mhiebing\Documents\GitHub_Repos\MonthlyStatsExtract\Page_1.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image
Thank you so much for your work.
There were problems with opencv-python-4.5.5.62
.
Use opencv-python==4.1.2.30
.
if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE: continue image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3) line_items_coordinates.append([(x,y), (x+w, y+h)])
Thanks a lot man, Before I was only getting few area marked but after revamp and using your code it's marking every single area
Thank you once again. Keep up the good work
if we want to optimize this code:
import cv2
from PIL import Image
from pdf2image import convert_from_path
def mark_region(im):
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 30)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if y >= 600 and x <= 1000 and area > 10000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
elif y >= 2400 and x<= 2000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
return image, line_items_coordinates
poppler_path = r 'you should write poppler bin folder path '
pages = convert_from_path("you should write here pdf path", 480, poppler_path=poppler_path)
for i, page in enumerate(pages):
image_name = f"Deneme_{i}.JPEG"
page.save(image_name, "JPEG")
im = cv2.imread(image_name)
marked_image, coordinates = mark_region(im)
cv2.imwrite(f"Marked_{image_name}", marked_image)
where in this code is it specifying the image path folder?