Last active
February 23, 2020 10:28
-
-
Save avalonv/72a298d75d0c360ec8e7a5a173bff82d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/env bash | |
# usage: tesseract-dl.sh {copy/edit} {lang} https://example.com/image.png | |
# with no options specified it prints to STDOUT | |
which tesseract 1>/dev/null || alias tesseract='tesseract-ocr' # the binary name varies | |
image=$(mktemp --suffix tesseract-image) | |
outfile=$(mktemp --suffix tesseract.txt) | |
if [[ "$1" == "copy" ]]; then | |
subcommand="xclip -i $outfile" # replace with your clipboard manager of choice | |
shift | |
elif [[ "$1" == "edit" ]]; then | |
subcommand="$VISUAL $outfile" # replace with your text editor of choice | |
shift | |
else | |
subcommand="cat $outfile" | |
fi | |
if [[ "$#" -gt 1 ]]; then | |
lang="-l $1" # three letter code: fra/spa/jpn etc, with eng as default | |
shift | |
fi | |
url=$1 | |
wget "$url" -O $image 2>/dev/null | |
tesseract $image - $lang 2>/dev/null > $outfile || exit 1 | |
$subcommand |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment