Skip to content

Instantly share code, notes, and snippets.

@pboehm
Last active December 29, 2015 17:19
Show Gist options
  • Save pboehm/7703422 to your computer and use it in GitHub Desktop.
Save pboehm/7703422 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# encoding: utf-8
require 'sinatra'
require 'redis'
require 'json'
require_relative './parser'
configure do
# BASIC AUTH
USER = ENV.fetch('NAUGHTY_USER') { 'naughty'}
PASSWORD = ENV.fetch('NAUGHTY_PASSWORD') { 'naughty'}
use Rack::Auth::Basic, "Login to get something naughty!" do |username, password|
username == USER && password == PASSWORD
end
# REDIS
uri = URI.parse(ENV["REDISCLOUD_URL"])
$redis = Redis.new(:host => uri.host, :port => uri.port, :password => uri.password)
end
def get_data(max_entries)
keys = $redis.hkeys('data').sort.reverse[0..max_entries-1]
$redis.hmget('data', keys).map do |element|
JSON.load(element)
end
end
get '/?:count?' do
count = params[:count] ||= "200"
(count = 200) unless count.match(/^\d+$/)
@data = get_data(count.to_i)
erb :index
end
__END__
@@ index
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<body>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="//netdna.bootstrapcdn.com/bootstrap/3.0.2/css/bootstrap.min.css" rel="stylesheet">
<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.2/js/bootstrap.min.js"></script>
<title>Naughty</title>
</head>
<body>
<div class="container" style="margin-top: 30px;">
<% @data.each do |d| %>
<div class="well">
<h3><%= d["heading"] %></h3>
<a target="_blank" href="<%= d["uploaded"] %>">
<img class="img-thumbnail" src="<%= d["img"] %>"/>
</a>
<br />
<form class="form-inline" role="form" style="margin-top: 20px;">
<div class="form-group">
<div class="col-xs-12">
<input class="form-control" type="text" value="<%= d["uploaded"] %>" onClick="this.select();" /><br />
</div>
</div>
</form>
<br />
<% if d["categories"] %>
<small>
<strong>Released:</strong> <span> <%= d["released"] %></span><br/>
<strong>Categories:</strong> <span> <%= d["categories"].join(', ') %></span>
</small>
<% end %>
</div>
<hr />
<% end %>
</div>
</body>
</body>
require './app'
run Sinatra::Application
source 'https://rubygems.org'
ruby '2.0.0'
gem 'sinatra'
gem 'nokogiri'
gem 'redis'
group :development do
gem 'shotgun'
gem 'pry'
gem 'pry-debugger'
end
group :production do
gem 'thin'
end
GEM
remote: https://rubygems.org/
specs:
coderay (1.1.0)
columnize (0.3.6)
daemons (1.1.9)
debugger (1.6.2)
columnize (>= 0.3.1)
debugger-linecache (~> 1.2.0)
debugger-ruby_core_source (~> 1.2.3)
debugger-linecache (1.2.0)
debugger-ruby_core_source (1.2.4)
eventmachine (1.0.3)
method_source (0.8.2)
mini_portile (0.5.1)
nokogiri (1.6.0)
mini_portile (~> 0.5.0)
pry (0.9.12.4)
coderay (~> 1.0)
method_source (~> 0.8)
slop (~> 3.4)
pry-debugger (0.2.2)
debugger (~> 1.3)
pry (~> 0.9.10)
rack (1.5.2)
rack-protection (1.5.1)
rack
redis (3.0.6)
shotgun (0.9)
rack (>= 1.0)
sinatra (1.4.4)
rack (~> 1.4)
rack-protection (~> 1.4)
tilt (~> 1.3, >= 1.3.4)
slop (3.4.7)
thin (1.6.1)
daemons (>= 1.0.9)
eventmachine (>= 1.0.0)
rack (>= 1.0.0)
tilt (1.4.1)
PLATFORMS
ruby
DEPENDENCIES
nokogiri
pry
pry-debugger
redis
shotgun
sinatra
thin
require 'nokogiri'
require 'open-uri'
require 'date'
def get_data_from_page(page)
data = []
link = "http://www.naughtyblog.org/page/#{ page }"
doc = Nokogiri::HTML(open(link))
doc.css('.post').each do |post|
id = post[:id].split(/-/)[1]
heading = post.css('.post-title > a').first
img = post.css('.post-content img').first[:src]
subpage_url = heading[:href]
key = URI(subpage_url).path.gsub(/\//, '')
subdoc = Nokogiri::HTML(open(subpage_url))
links = subdoc.css('a[href*="uploaded.net"][title*="uploaded.net"]')
.map { |l| l[:href] }
categories = subdoc.css('.post-metadata a[rel="category tag"]').map(&:text)
begin
released = Date.parse(subdoc.css('em:contains(Released)').first.text)
rescue Exception => e
puts e.message
next
end
elem = {
id: id,
key: key,
heading: heading.text,
subpage: subpage_url,
img: img,
uploaded: links.join(" "),
categories: categories,
released: released.to_s
}
yield(elem) if block_given?
data << elem
end
data
end
web: bundle exec thin -R config.ru start -p $PORT -e $RACK_ENV
worker: bundle exec rake parse
require 'redis'
require 'json'
require_relative './parser'
uri = URI.parse(ENV["REDISCLOUD_URL"])
$redis = Redis.new(:host => uri.host, :port => uri.port, :password => uri.password)
task :parse do
catch(:done) do
(1...10).each do |page|
get_data_from_page(page) do |element|
added = $redis.hsetnx('data', element[:id], JSON.dump(element))
throw :done unless added
puts element[:key]
end
end
end
end
task :flush do
$redis.flushdb
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment