Last active
April 3, 2024 11:33
-
-
Save vinodjayachandran/0b9b325ca27854a2326cf0d438dcd088 to your computer and use it in GitHub Desktop.
From a folder containing Invoice/Receipt Images, extract Information using Open AI (gpt-4-vision-preview)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# File showing usage of openaiUtil.py | |
import openaiUtil | |
# Press the green button in the gutter to run the script. | |
if __name__ == '__main__': | |
print(" Welcome") | |
print(openaiUtil.question_image(openaiUtil.generate_payload("image_folder_path"))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
import os | |
import requests | |
from openai import OpenAI | |
# Read OpenAI API Key from environment variable | |
api_key = os.environ.get("OPENAI_API_KEY") | |
client = OpenAI(api_key=api_key) | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
content: list[dict[str, str | dict[str, str]]] = [ | |
{ | |
"type": "text", | |
"text": "Extract the data from all the invoice images. Provide the Invoice date in YYYY/MM/DD, Currency ISO code, Invoice total " | |
"and Invoices with the structure {“Item”: String, “Unit Price”:String, " | |
"“Quantity”:String, “Amount”:String}" | |
} | |
] | |
def generate_image_payload(base64_image): | |
image_payload = { | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}" | |
}, | |
} | |
return image_payload | |
def generate_payload(image_folder_path): | |
print(f" Content {content}") | |
for filename in os.listdir(image_folder_path): | |
base64_image = encode_image(image_folder_path+"/"+filename) | |
content.append(generate_image_payload(base64_image)) | |
user_message = {"role": "user"} | |
user_message['content'] = content | |
system_message = { | |
"role": "system", | |
"content": "You are an AI assistant that extracts data from images and returns them as " | |
"structured JSON objects." | |
} | |
messages: list = [system_message,user_message] | |
payload_template = {"model": "gpt-4-vision-preview"} | |
payload_template['messages'] = messages | |
return payload_template | |
def question_image(payload): | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
temp = response.json() | |
return temp['choices'][0]['message']['content'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment