Skip to content

Instantly share code, notes, and snippets.

@flexchar
Created May 12, 2024 07:53
Show Gist options
  • Save flexchar/8b7a0082847b0514409efab1e7a0f939 to your computer and use it in GitHub Desktop.
Save flexchar/8b7a0082847b0514409efab1e7a0f939 to your computer and use it in GitHub Desktop.
I tried extracting messages from my screenshot from iMessage screenshot. And more. I failed. But it's a lot of code that maybe AI be inspired to learn from in the future "Pile's".
<?php
namespace App\Jobs\OCR;
use App\Models\Media;
use App\Models\Message;
use App\Services\Helpers;
use Laravel\Nova\Makeable;
use Illuminate\Support\Str;
use App\Models\Conversation;
use Illuminate\Bus\Batchable;
use Illuminate\Bus\Queueable;
use App\Casts\Message\ExtraData;
use Google\Cloud\Vision\V1\Page;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Cache;
use App\Types\Fluent\GoogleOcrMessage;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\InteractsWithQueue;
use App\Types\Message\Type as MessageType;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use App\Types\Message\Source as MessageSource;
use Illuminate\Contracts\Queue\ShouldBeUnique;
use Google\Cloud\Vision\V1\ImageAnnotatorClient;
use Google\Cloud\Vision\V1\AnnotateImageResponse;
/**
* Extract text from a screenshot using Google Cloud Vision OCR.
*
* @deprecated Use `ParseMediaViaAI` instead.
*/
class ParseScreenshotViaGoogle implements ShouldBeUnique, ShouldQueue
{
use Batchable,
Dispatchable,
InteractsWithQueue,
Makeable,
Queueable,
SerializesModels;
public $tries = 3;
public ImageAnnotatorClient $client;
public AnnotateImageResponse $annotation;
public function uniqueId()
{
return $this->media->getKey();
}
public function __construct(public Conversation $convo, public Media $media)
{
}
// https://cloud.google.com/vision/docs/ocr#detect_text_in_a_local_image
public function handle(ImageAnnotatorClient $client): void
{
if ($this->batch()?->cancelled()) {
return;
}
$this->client = $client;
$page = $this->mediaToPage();
Cache::put(
key: 'vision:' . $this->media->getKey(),
value: $page->serializeToJsonString(),
ttl: 3600 * 24 * 7,
);
$pWidth = $page->getWidth();
$pHeight = $page->getHeight();
// Save image height and width
$this->media->update([
'custom_properties->width' => $pWidth,
'custom_properties->height' => $pHeight,
]);
$toSave = $this->pageToMessageClass($page);
// dd(
// $toSave
// //
// ->toArray(),
// );
$this->deletePreviousMessages();
$this->convo->messages()->saveMany($toSave);
}
public function deletePreviousMessages(): void
{
$this->media
->messages()
->where('source', MessageSource::OCR)
->withTrashed()
->get()
->each->forceDelete();
}
public function mediaToAnnotation(): self
{
// $cacheKey = 'vision:ocr:' . $this->media->getKey();
// if (app()->isLocal() && cache()->has($cacheKey)) {
// return cache()->get($cacheKey);
// }
$media = $this->media;
$url = $media->getTemporaryUrl(
expiration: now()->addMinutes(5),
// Food for thought. I could also take raw image, run redact and then OCR.
// conversionName: $media->hasGeneratedConversion('eco') ? 'eco' : '',
);
try {
$annotation = $this->client->documentTextDetection($url);
} finally {
$this->client->close();
}
// if (app()->isLocal()) {
// cache()->put($cacheKey, $annotation, $devCacheTTLHours * 3600);
// }
$this->annotation = $annotation;
return $this;
}
public function imageToAnnotation($imageContent, $cacheTTL = false): self
{
$cacheKey = 'google:ocr:' . hash('murmur3f', $imageContent);
if ($cacheTTL !== false && app()->isLocal() && cache()->has($cacheKey)) {
$this->annotation = cache()->get($cacheKey);
return $this;
}
try {
$annotation = $this->client->documentTextDetection($imageContent);
} finally {
$this->client->close();
}
$this->annotation = $annotation;
if ($cacheTTL !== false && app()->isLocal()) {
cache()->put($cacheKey, $annotation, $cacheTTL);
}
return $this;
}
public function mediaToPage(): Page
{
$textAnnotation = $this->annotation->getFullTextAnnotation();
if (!$textAnnotation) {
throw new \Error('OCR returned no pages for this image.');
}
/** @var Page[] $pages */
$pages = $textAnnotation->getPages();
return $pages[0];
}
/**
* @return Collection<int, GoogleOcrMessage>
*/
public static function toMessages(Page $page): Collection
{
$json = json_decode($page->serializeToJsonString());
// dd($page->getText(), $json);
$pageWidth = (int) $json->width;
$pageLang = (string) data_get($json, 'property.detectedLanguages.0.languageCode');
// $page->getProperty()->getDetectedLanguages()[0]->getLanguageCode();
// @phpstan-ignore-next-line
return collect($json->blocks)
->flatten(0)
->pluck('paragraphs')
->flatten(0)
->map(
fn($paragraph) => static::processParagraph(
$paragraph,
$pageWidth,
$pageLang,
),
)
// Order messages by their y position.
->sortBy('bounds.top')
->values();
}
/**
* @property \Google\Cloud\Vision\V1\Paragraph $paragraph
*/
public static function processParagraph(
object $paragraph,
int $pageWidth,
string $pageLang,
): GoogleOcrMessage {
/** @var \Google\Cloud\Vision\V1\Vertex $bounds */
$bounds = $paragraph->boundingBox->vertices;
$allLangs = data_get(
$paragraph,
'words.*.property.detectedLanguages.0.languageCode',
);
$lang = collect($allLangs)->unique()->filter()->first();
$text = collect($paragraph->words)
->pluck('symbols')
->map(function ($symbols) {
// Detect the distance between the characters.
$s = collect($symbols)
// ->dump()
->map(function ($symbol) {
return [
'text' => $symbol->text,
'start' => $symbol->boundingBox->vertices[0]->x ?? 0,
'end' => $symbol->boundingBox->vertices[1]->x,
];
})
->toObjects();
return [
'text' => $s->join('text'),
'start' => $s->first()->start,
'end' => $s->last()->end,
];
})
->toObjects()
// ->dd()
->reduce(
function (array $acc, object $s) {
// If not punctuation, add space.
if (!ctype_punct($s->text)) {
return [
'last' => $s->end,
'str' => $acc['str']->append(" {$s->text}"),
];
}
// Close punctuation, like commas, don't add space.
return [
'last' => $s->end,
'str' => $acc['str']->append($s->text),
];
},
[
'last' => 0,
'str' => Str::of(''),
],
)
['str']->trim()
->__toString();
//OBS: Google doesn't return x=0 or y=0 when text is cut on the side of the image.
// Extract the coordinates (edges) of the message.
[$left, $top] = [$bounds[0]->x ?? 0, $bounds[0]->y ?? 0]; // Top left corner.
[$right, $bottom] = [$bounds[2]->x, $bounds[2]->y]; // Bottom right corner.
// Calculate the distance between the text to the sides
// Distance from the end of the message til right side of the image.
$toRight = $pageWidth - $right;
// Distance from the start of the message til left side of the image.
$toLeft = $left;
// In theory, if the distance is lower to the right side, it's a message from me.
// If the distance is lower to the left side, it's a message from the other person.
return GoogleOcrMessage::make([
'is_me' => $toRight < $toLeft,
'body' => $text,
'confidence' => round($paragraph->confidence, 2),
'width' => abs($left - $right),
'height' => abs($top - $bottom),
'bounds' => (object) [
'left' => $left,
'top' => $top,
'right' => $right,
'bottom' => $bottom,
],
'language' => $lang ?: $pageLang,
]);
}
/**
* @param Collection<int, GoogleOcrMessage> $messages
* @return Collection<int, GoogleOcrMessage>
*/
public static function mergeMultilineMessages(Collection $messages): Collection
{
// Let's calculate the line height using the median of all line heights.
// It'll be used to combine single message that span across multiple lines.
// We will fetch the line height from the first message because it's the most likely to be the same as the others.
$lineHeight = $messages
->map(fn(object $m) => $m->bounds->top - $m->bounds->bottom)
->map('abs')
->median();
// dd($messages);
$merged = $messages
// Traverse each message and calculate the y distance between the last message's bottom and the current message's top.
->map(function (object $current, $index) use ($messages, $lineHeight) {
// If it's the first message, skip.
if ($index === 0) {
$current->distance_to_prev = 0;
$current->top_to_top = 0;
$current->is_the_same_message = false;
return $current;
}
// Caclulate the distance between the last message's bottom and the current message's top.
$prev = $messages[$index - 1];
$distance = abs($prev->bounds->bottom - $current->bounds->top);
$current->distance_to_prev = $distance;
// This doesn't take into the account if messages are on the same line.
// So let's calculate the distance between the top of the message and the top of the previous message.
$topToTop = abs($prev->bounds->top - $current->bounds->top);
$current->top_to_top = $topToTop;
// The name means that the current message is the same as the previous message.
$current->is_the_same_message =
// If the distance is lower than the line height, it's on the same line.
// If the distance is higher than the line height, it's on the next line.
// And if the top to top distance is higher than the line height, it's on the next line.
// Otherwise, it's on the same line but on the different parts on the x axis.
$topToTop > $lineHeight && $distance < $lineHeight;
return $current;
})
// Combine the messages that are the same. This was automatically done by the AI. But reviewed by me. Siick.
->reduce(function (Collection $acc, object $current) {
// If the current message is not the same as the previous message, just add it to the collection.
if ($current->is_the_same_message === false) {
return $acc->push($current);
}
// If the current message is the same as the previous message, combine them.
// Get the last message. This will be updated by reference.
$last = $acc->last();
// Combine the messages.
$last->body = trim("{$last->body} {$current->body}");
// Update the bounds.
$last->bounds->bottom = $current->bounds->bottom;
$last->bounds->left = min($last->bounds->left, $current->bounds->left);
$last->bounds->right = max($last->bounds->right, $current->bounds->right);
// Update the width.
$last->width = abs($last->bounds->left - $last->bounds->right);
// Update the height.
$last->height = abs($last->bounds->top - $last->bounds->bottom);
// Update the confidence.
$last->confidence = ($last->confidence * $current->confidence) / 2;
return $acc;
}, collect())
->map(function ($m) {
// Unset the keys that are not needed anymore.
unset($m->distance_to_prev);
unset($m->top_to_top);
unset($m->is_the_same_message);
return $m;
});
return $merged;
}
/**
* @param Collection<int, GoogleOcrMessage> $messages
* @return Collection<int, GoogleOcrMessage>
*/
public static function enrichMessages(Collection $messages): Collection
{
return $messages
->map(function (GoogleOcrMessage $m) {
$m->is_irrelevant = Helpers::isMessageIrrelevant($m->body);
// Detect short messages like "haha" and set their language to EN
$m->is_interjection = Helpers::isMessageInterjection($m->body);
// Detect emoji reactions/likes/datestamps
$m->is_time = Helpers::isMessageTime($m->body);
if (
// If language is not EN and message is an interjection, set it to EN
(strtolower($m->language) !== 'en' && $m->is_interjection) ||
// If language is not EN and message is time, set it to EN
(strtolower($m->language) !== 'en' && $m->is_time)
) {
$m->language = 'EN';
}
// If message is time, set type to time
$m->type = $m->is_time ? MessageType::TIMESTAMP : MessageType::TEXT;
// If message is time, set is_me to false
$m->is_me = $m->is_time ? false : $m->is_me;
return $m;
})
->values();
}
public function pageToMessageClass(Page $page): Collection
{
$messages = $this->toMessages($page);
$merged = $this->mergeMultilineMessages($messages);
$enriched = $this->enrichMessages($merged);
// Remove irrelevant messages
$filtered = $enriched->where('is_irrelevant', false);
$pWidth = $page->getWidth();
$pHeight = $page->getHeight();
$processed = $filtered->map(
fn(GoogleOcrMessage $obj, int $index) => new Message([
'body' => $obj->body,
'is_me' => $obj->is_me,
'language' => $obj->language,
'extra_data' => new ExtraData(
box_left: $obj->bounds->left,
box_top: $obj->bounds->top,
box_right: $obj->bounds->right,
box_bottom: $obj->bounds->bottom,
is_irrelevant: $obj->is_irrelevant,
confidence: $obj->confidence,
photo_width: $pWidth,
photo_height: $pHeight,
),
'media_id' => $this->media->getKey(),
'order_column' => $index + 1,
'source' => MessageSource::OCR,
'type' => $obj->type,
]),
);
return $processed;
}
public static function parseImageIntoMessages(string $binaryData): Collection
{
$client = app(ImageAnnotatorClient::class);
$self = new self(media: new Media(), convo: new Conversation());
$self->client = $client;
$self->imageToAnnotation($binaryData, 600);
$page = $self->mediaToPage();
$messages = $self->pageToMessageClass($page);
return $messages;
}
public static function parseImageIntoBlockText(string $binaryData): string
{
$client = app(ImageAnnotatorClient::class);
$self = new self(media: new Media(), convo: new Conversation());
$self->client = $client;
$self->imageToAnnotation($binaryData, 600);
$blockText = $self->annotation->getFullTextAnnotation()->getText();
return $blockText;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment