Last active
August 18, 2019 07:41
-
-
Save sardisson/e50286e17c8f2f39bfec9429ed5f46cd to your computer and use it in GitHub Desktop.
AppleScript to create a thumbnail from the first image present in a Micro.blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
* Thumbnail image from Micro.blog post | |
* Creates a thumbnail from the first image present in a given Micro.blog post | |
* v1.0.7 | |
* 2019-08-18 | |
* https://gist.github.com/sardisson/e50286e17c8f2f39bfec9429ed5f46cd | |
*) | |
on run | |
-- gruber's get browser routine, with a twist to exclude non-Apple/Google browsers | |
set _browser to GetCurrentApp() | |
if _browser is not in {"Safari", "WebKit", "Safari Technology Preview", "Chrome", "Google Chrome"} then | |
set _browser to GetDefaultWebBrowser() | |
if _browser is not in {"Safari", "WebKit", "Safari Technology Preview", "Chrome", "Google Chrome"} then | |
display dialog "Sorry, this script only works with Safari and Chrome" with icon stop buttons {"Cancel"} default button 1 | |
error number -128 | |
end if | |
end if | |
-- set up the HTML skeleton in variables | |
set theHTML to "<html><head><title>Photos from Micro.blog</title><meta charset='utf-8'></head><body>" | |
set theHTMLend to "</body></html>" | |
-- takes a return-delimited list of URLs | |
set inputText to text returned of (display dialog "Please enter a return-delimited list of URLs of Micro.blog posts to thumbnail" default answer "" with icon note) as string | |
if inputText is "" then error number -128 | |
-- split the input into a List by paragraph | |
-- this should never fail, because it's either "" (handled above) or at least 1 paragraph | |
set postList to every paragraph of inputText | |
-- used to adjust thumbnail size to fit in a window based on final count of images | |
-- ideally the thumbnail size would be set correctly beforehand, but since it is written into the HTML as part of processing each image, and bogus URLs are filtered out as part of the image-processing loop, increment a count each time we successfully write an image-thumbnail HTML snippet | |
set theCount to 0 | |
-- handle each post URL | |
repeat with thePost in postList | |
try -- using a try block to fake "continue" -- https://stackoverflow.com/a/6007211 | |
-- skip any non-Micro.blog URLs in the list | |
if thePost does not start with "https://micro.blog/" then error 0 | |
--log thePost | |
-- get the id of the Micro.blog post to construct the HTML div id to look in for the image | |
set theTIDs to AppleScript's text item delimiters | |
set AppleScript's text item delimiters to "/" | |
try | |
set thePostID to last text item of thePost as string | |
set AppleScript's text item delimiters to theTIDs | |
on error theErr number errNum | |
set AppleScript's text item delimiters to theTIDs | |
error number -128 | |
end try | |
-- set up the commands for do JavaScript to get the img src and alt | |
-- set jsImgSrcCommand to "document.getElementById('post_div_" & thePostID & "').getElementsByClassName('post_content')[0].getElementsByTagName('img')[0].src" | |
set jsImgSrcCommand to "document.getElementById('post_div_" & thePostID & "').getElementsByClassName('post_content')[0].querySelector('img:not(.wp-smiley)').src;" | |
-- XXX ideally filter out WP-Emoji images and retry with img[1].src and img[1].alt -- see below for variants | |
-- XXX probably set up a small loop for jsImgSrcCommand/jsImgAltCommand that goes from 0 to 5, get node[n], checks for NOT the WP Emoji URL and exits, else tries again. | |
-- but the problem is that the commands are run later, by each browser; this just defines the commands that are run, using different syntax, by each browser. not sure how to refactor | |
-- maybe have this as a function, which internally loops node[n] until it gets a non-empty, non-WP emoji result (and then fetches the alt), and each browser call it, with loop-internal separation for browser types? Why did Google have to fork 'do JavaScript'?! | |
(** | |
repeat with n from 0 to 5 | |
set jsImgSrcCommand to "document.getElementById('post_div_" & thePostID & "').getElementsByClassName('post_content')[0].getElementsByTagName('img')[" & n & "].src" | |
set jsImgAltCommand to "document.getElementById('post_div_" & thePostID & "').getElementsByClassName('post_content')[0].getElementsByTagName('img')[" & n & "].alt" | |
-- if the first value returned does not contain WP Emoji, then it’s what we want and we can skip checking the others | |
if jsImgSrcCommand does not contain "https://s.w.org/images/core/emoji/" then exit repeat | |
-- https://s.w.org/images/core/emoji/12.0.0-1/72x72/1f4f8.png for self-hosted | |
-- https://s0.wp.com/wp-content/mu-plugins/wpcom-smileys/twemoji/2/72x72/1f490.png for wp.com | |
end repeat | |
**) | |
-- set jsImgAltCommand to "document.getElementById('post_div_" & thePostID & "').getElementsByClassName('post_content')[0].getElementsByTagName('img')[0].alt" | |
set jsImgAltCommand to "document.getElementById('post_div_" & thePostID & "').getElementsByClassName('post_content')[0].querySelector('img:not(.wp-smiley)').getAttribute('alt');" | |
-- if alt is empty, grab the post text/title in order to have a fallback in at least some cases; long strings truncated later | |
set jsTitleCommand to "document.getElementsByTagName('title')[0].textContent.trim()" | |
-- XXX https://micro.blog/bzz/3537228 fails to fetch image for some reason (but fetches title-as-caption fine) | |
-- fetch image URL from Google Chrome | |
if _browser is in {"Chrome", "Google Chrome"} then | |
tell application "Google Chrome" | |
open location thePost | |
-- https://apple.stackexchange.com/q/343624 | |
repeat until (loading of active tab of front window is false) | |
delay 0.25 | |
end repeat | |
tell (active tab of front window) to set theImg to execute javascript jsImgSrcCommand | |
tell (active tab of front window) to set theAlt to execute javascript jsImgAltCommand | |
-- filter out bogus alt and set to blank to force fetching post content | |
-- if theAlt is in {"mp-photo-alt[]=", "mp-photo-alt[]=mp-photo-alt[]="} then set theAlt to "" | |
if ((theAlt starts with "mp-photo-alt[]=") or (theAlt is missing value)) then set theAlt to "" | |
-- if there's no alt, make one up from the post content (as reflected in the page title) | |
if theAlt = "" then tell (active tab of front window) to set theAlt to my truncateTitle(execute javascript jsTitleCommand) | |
close active tab of front window -- try guard? | |
end tell | |
end if | |
-- end Google Chrome | |
-- fetch image URL from Apple's WebKit browsers | |
if _browser is in {"Safari", "WebKit", "Safari Technology Preview"} then | |
using terms from application "Safari" | |
tell application _browser | |
open location thePost | |
tell current tab of front window | |
-- this is an ugly hack, but the standard "until there's document source" method seems crash-happy here, and we really want the DOM+specific src anyway, so may as well wait for exactly that | |
-- XXX this fails in some versions of Safari :-( | |
-- XXX this either needs a timeout or to look for some other DOM element, because pages with no images just sit forever: https://micro.blog/numericcitizen/4141716 | |
repeat until (do JavaScript jsImgSrcCommand) is not missing value -- or ≠ ? | |
delay 0.25 -- allow a bit of time to breathe | |
end repeat | |
-- ideally it would be possible to repeat until we got the src and then assign the result, but the repeat eats the result, so get it again | |
set theImg to do JavaScript jsImgSrcCommand | |
set theAlt to do JavaScript jsImgAltCommand | |
-- filter out bogus alt and set to blank to force fetching post content | |
-- if theAlt is in {"mp-photo-alt[]=", "mp-photo-alt[]=mp-photo-alt[]="} then set theAlt to "" | |
if ((theAlt starts with "mp-photo-alt[]=") or (theAlt is missing value)) then set theAlt to "" | |
-- if there's no alt, make one up from the post content (as reflected in the page title) | |
if theAlt = "" then set theAlt to my truncateTitle(do JavaScript jsTitleCommand) | |
end tell | |
close current tab of front window -- try guard? | |
end tell | |
end using terms from | |
end if | |
--end Apple's WebKit browsers | |
-- strip any previous photos.m.b url segment (inline thumbs) | |
-- so far always 29 chars, https://photos.micro.blog/50/ | |
if theImg starts with "https://photos.micro.blog/" then | |
try -- just in case | |
set theImg to ((characters 30 through the end) of theImg) as string | |
on error theErr number errNum | |
end try | |
end if | |
-- create Micro.blog photo thumbnail URL | |
set newImg to "https://photos.micro.blog/150/" & theImg as string | |
-- and HTML image thumbnail and link | |
set theHTML to theHTML & return & "<a href='" & thePost & "'><img src='" & newImg & "' alt='" & theAlt & "'></a>" | |
set theCount to theCount + 1 | |
on error theErr number errNum | |
if theErr is not 0 then display dialog errNum & ": " & theErr | |
end try | |
end repeat | |
--finish HTML document skeleton | |
set theHTML to theHTML & return & theHTMLend | |
--adjust thumbnail size based on number of photos (& my window width) | |
--using else if, the first branch that evaluates to true will be executed | |
if theCount > 132 then | |
set replSize to "/90/" | |
set theHTML to my replaceThumbSize(theHTML, replSize) | |
else if theCount > 110 then | |
set replSize to "/95/" | |
set theHTML to my replaceThumbSize(theHTML, replSize) | |
else if theCount > 100 then | |
set replSize to "/100/" | |
set theHTML to my replaceThumbSize(theHTML, replSize) | |
else if theCount > 90 then | |
set replSize to "/105/" | |
set theHTML to my replaceThumbSize(theHTML, replSize) | |
else if theCount > 81 then -- 72+ is slightly cut-off, but useable | |
set replSize to "/110/" | |
set theHTML to my replaceThumbSize(theHTML, replSize) | |
else if theCount > 49 then | |
set replSize to "/120/" | |
set theHTML to my replaceThumbSize(theHTML, replSize) | |
end if | |
-- and load it in the browser using data: URI | |
tell application _browser to open location "data:text/html;charset=utf-8," & theHTML | |
-- XXX provide an option to save this to a file (where the returns will be visible) | |
end run | |
-- https://daringfireball.net/2009/01/applescripts_targetting_safari_or_webkit | |
on GetCurrentApp() | |
tell application "System Events" to ¬ | |
get short name of first process whose frontmost is true | |
end GetCurrentApp | |
-- XXX is there a 64-bit-compliant version of this? | |
on GetDefaultWebBrowser() | |
-- First line of _scpt is a workaround for Snow Leopard issues | |
-- with 32-bit Mac:: Carbon modules | |
set _scpt to "export VERSIONER_PERL_PREFER_32_BIT=yes; " & ¬ | |
"perl -MMac::InternetConfig -le " & ¬ | |
"'print +(GetICHelper \"http\")[1]'" | |
return do shell script _scpt | |
end GetDefaultWebBrowser | |
-- truncate page title into something vaguely usable as alt text | |
on truncateTitle(theAlt) | |
if (count words of theAlt) > 10 then set theAlt to (text (word 1) thru (word 10) of theAlt & "….") as string | |
-- if page title is empty (there was no text with the post, only a photo), add a last-ditch alt string | |
if theAlt = "" then set theAlt to "Image from Micro.blog; description unavailable." | |
return theAlt | |
end truncateTitle | |
on replaceThumbSize(theText, theSize) | |
set origText to theText -- save the original in case of error | |
set theTIDs to AppleScript's text item delimiters | |
set AppleScript's text item delimiters to "/150/" | |
try | |
set theStrings to text items of theText | |
set AppleScript's text item delimiters to theSize | |
set theText to theStrings as string | |
set AppleScript's text item delimiters to theTIDs | |
on error theErr number errNum | |
set AppleScript's text item delimiters to theTIDs | |
set theText to origText | |
end try | |
return theText | |
end replaceThumbSize |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment