Created
June 10, 2016 16:28
-
-
Save jmcveigh/bc92845d501e50855c51b2b1ba8e1b77 to your computer and use it in GitHub Desktop.
This is a photo hog, written in Perl, used to collect animated gifs from reddit. My OAuth information has been suppressed, get your own easily through Reddit.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/perl -w | |
# the original Reddit::Client does not pass the time context parameter | |
# the class below inherits from Reddit::Client with intent to pass the time context parameter | |
package MyRedditClient { | |
@ISA = qw(Reddit::Client); | |
# this is the default for the time context parameter | |
use constant T_DEFAULT => 'year'; | |
sub my_fetch_links { | |
my ($self, %param) = @_; | |
my $query = {}; | |
my $subreddit = $param{subreddit} || ''; | |
my $view = $param{view} || $self->SUPER::VIEW_DEFAULT; | |
# accept time context paramter or set to default | |
my $t = $param{t} || T_DEFAULT; | |
# include time context parameter in query string | |
$query->{t} = $t; | |
$query->{before} = $param{before} if $param{before}; | |
$query->{after} = $param{after} if $param{after}; | |
if (exists $param{limit}) { $query->{limit} = $param{limit} || 500; } | |
else { $query->{limit} = $self->SUPER::DEFAULT_LIMIT; } | |
# NOTE: line below causes 404 error on request so it is skipped | |
# $subreddit = $self->SUPER::subreddit($subreddit); | |
my $args = [$view]; | |
unshift @$args, $subreddit if $subreddit; | |
my $result = $self->SUPER::api_json_request( | |
api => ($subreddit ? $self->SUPER::API_LINKS_FRONT : $self->SUPER::API_LINKS_OTHER), | |
args => $args, | |
data => $query, | |
); | |
return [ | |
map {Reddit::Client::Link->new($self, $_->{data})} @{$result->{data}{children}} | |
]; | |
} | |
} | |
# this is the application package | |
package Application { | |
use Moose; | |
use Reddit::Client; | |
use Term::ProgressBar::Simple; | |
use LWP::Simple; | |
use Archive::Tar; | |
use Try::Tiny; | |
use feature 'say'; | |
use feature 'state'; | |
use feature 'switch'; | |
# below is the application authentication using OAuth for Reddit | |
use constant REDDIT_USER_AGENT => 'moar-squee-gifs v0.01 by /u/jwmcveigh'; | |
use constant REDDIT_CLIENT_ID => '8j7I89-Th6P2LQ'; | |
use constant REDDIT_USERNAME => 'jwmcveigh'; | |
use constant REDDIT_SUBREDDIT => 'babyelephantgifs'; | |
# I have suppressed my authentication information to be safe | |
# Feel free to add your own. It should only take a moment | |
use constant REDDIT_SECRET => ''; | |
use constant REDDIT_PASSWORD => ''; | |
use namespace::autoclean; | |
# this is an instance of my own Reddit::Client | |
has '_client' => ( | |
is => 'ro', | |
isa => 'MyRedditClient', | |
required => 1, | |
default => sub { | |
MyRedditClient->new( | |
user_agent => REDDIT_USER_AGENT, | |
client_id => REDDIT_CLIENT_ID, | |
secret => REDDIT_SECRET, | |
username => REDDIT_USERNAME, | |
password => REDDIT_PASSWORD, | |
) | |
}, | |
); | |
# this is the process for downloads | |
sub proc { | |
my ($self) = @_; | |
my @items; | |
my $after; | |
# this is the first iteration of the fetch_links loop | |
# fetch 100 links | |
my $posts = $self->_client->my_fetch_links(subreddit => REDDIT_SUBREDDIT, limit => 100, view => Reddit::Client::VIEW_TOP, t => 'year'); | |
for (@{$posts}) { | |
# push imgur urls | |
push @items, $_ if ($_->{url} =~ m/i\.imgur\.com/); | |
} | |
print '.'; | |
# next fetch_links will fetch all links after our last item | |
$after = $items[-1]->{name}; | |
# contine to fetch links until no more are available | |
while ($#{$posts} >= 1) { | |
# fetch 100 links | |
$posts = $self->_client->my_fetch_links(subreddit => REDDIT_SUBREDDIT, limit => 100, view => Reddit::Client::VIEW_TOP, after => $after, t => 'year'); | |
my $new_after; | |
try { | |
# next fetch_links will fetch all links after our last item | |
$new_after = $posts->[-1]->{name}; | |
last if $new_after eq $after; | |
$after = $new_after; | |
# push imgur urls | |
for (@{$posts}) { | |
push @items, $_ if ($_->{url} =~ m/i\.imgur\.com/); | |
} | |
print '.'; | |
} catch { | |
# No safe test for the last url known by me, so we'll exit the loop on exception | |
say ''; | |
last; | |
}; | |
} | |
# print the number of items found | |
say ''; | |
say "Found " . $#items . " items"; | |
say ''; | |
# this progress bar sure comes in handy | |
my $progress = Term::ProgressBar::Simple->new($#items); | |
# create folder to contain downloads | |
mkdir REDDIT_SUBREDDIT unless (-e REDDIT_SUBREDDIT); | |
my $idx = 0; | |
for (@items) { | |
# match the imgur item tag as well as the extension | |
$_->{url} =~ m/(\w+)\.(\w+)$/; | |
my $tag = $1; | |
# watch for GIFV which is really a web document for a player | |
my $ext = "." . $2; | |
chop $ext if ($ext =~ m/v$/); | |
# get the item | |
my $buf = get("http://imgur.com/download/${tag}"); | |
if ($buf) { | |
# set folder named after the subreddit with the subreddit as a prefix and a 4 digit numeric index | |
my $tmp_photo_outfile_basename = REDDIT_SUBREDDIT . '-' . sprintf("%04d", $idx) . $ext; | |
# save file to disc | |
open OUTFILE, ">", REDDIT_SUBREDDIT . "/" . $tmp_photo_outfile_basename; | |
binmode(OUTFILE); | |
print OUTFILE $buf; | |
close OUTFILE; | |
# this is the 4 digit numeric index | |
$idx++; | |
} | |
last if $idx == 4; | |
# update progress bar | |
$progress++; | |
} | |
} | |
sub main { | |
my ($self) = @_; | |
$self->proc; | |
} | |
__PACKAGE__->meta->make_immutable; | |
} | |
# begin | |
my $app = Application->new->main unless caller; | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment