-
-
Save hamdshah/a94df20fe56c0029e6057cdab255337a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
CONTENTS=$(tesseract -c language_model_penalty_non_dict_word=0.8 --tessdata-dir /usr/local/share/ "$1" stdout -l eng | xml esc) | |
hex=$((cat <<EOF | |
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |
<plist version="1.0"> | |
<string>$CONTENTS</string> | |
</plist> | |
EOF | |
) | plutil -convert binary1 - -o - | xxd -p | tr -d '\n') | |
xattr -w -x com.apple.metadata:kMDItemFinderComment "$hex" "$1" | |
mdimport "$1" | |
#OR use this one. | |
#If you screenshot individual windows, the alpha channel prevents Tesseract from scanning properly. Also a lot of UI text is too small to accurately scan. To solve this I preprocessed with ImageMagick like so: | |
#CONTENTS=$(convert "$1" -magnify -alpha remove - | tesseract -c language_model_penalty_non_dict_word=0.8 --tessdata-dir /usr/local/share/ stdin stdout -l eng | xml esc) | |
#Testing with a screenshot of my Terminal, I got better results with -magnify than -adaptive-resize '200%x200%', but YMMV. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment