|
# Will take a Radio 1 episode and download it |
|
# Requires rtmpdump and ffmpeg |
|
# Props to http://stream-recorder.com/forum/showpost.php?p=30885&postcount=57 |
|
|
|
REGEX_PARSE = /(programmes|episode)\/([A-Za-z0-9]*)\/?/ |
|
REGEX_FFMPEG = /bitrate: ([0-9]*)/ |
|
REGEX_BAD_TITLE = /[^A-Za-z0-9-]+/ |
|
REGEX_LATEST = /^latest:(.*)/ |
|
|
|
require 'net/http' |
|
require 'uri' |
|
require 'nokogiri' |
|
require 'tempfile' |
|
require 'open3' |
|
require 'cgi' |
|
|
|
# check rtmpdump can be located |
|
|
|
def find_executable(exec) |
|
|
|
# props to mislav @ http://stackoverflow.com/a/5471032/497646 |
|
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] |
|
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| |
|
exts.each { |ext| |
|
exe = "#{path}#{File::SEPARATOR}#{exec}#{ext}" |
|
return exe if File.executable? exe |
|
} |
|
end |
|
return nil |
|
|
|
end |
|
|
|
rtmpdump_bin = find_executable "rtmpdump" |
|
raise "Could not locate rtmpdump" if rtmpdump_bin.nil? |
|
|
|
ffmpeg_bin = find_executable "ffmpeg" |
|
raise "Could not locate ffmpeg" if ffmpeg_bin.nil? |
|
|
|
# by default assume we are getting a http://www.bbc.co.uk/programmes/XXXXXXXX address |
|
|
|
input_ref = ARGV[0] |
|
output = ARGV[1] |
|
target_bitrate = ARGV[2] |
|
|
|
raise "An argument for the URL or programme ID must be provided" if input_ref.nil? |
|
|
|
output = Dir.pwd if output.nil? || (output.is_a?(String) && output.empty?) |
|
|
|
programme_id = nil |
|
|
|
if !(input_ref.match(REGEX_LATEST).nil?) |
|
# okay user wants the latest from the show page |
|
show_url = $1 |
|
puts "Scraping webpage for latest link" |
|
|
|
show_url = URI.parse show_url |
|
show_response = Net::HTTP.get_response show_url |
|
show_obj = Nokogiri::HTML show_response.body |
|
show_obj.remove_namespaces! |
|
|
|
ref_point = show_obj.xpath("//h2[contains(text(),'Latest episode')]").first |
|
completed = false |
|
|
|
while(ref_point != nil && ref_point['href'].nil?) |
|
ref_point = ref_point.next_element |
|
end |
|
|
|
unless ref_point.nil? |
|
input_ref = ref_point['href'] |
|
puts "Using #{input_ref} as latest link" |
|
end |
|
end |
|
|
|
parsed = input_ref.match REGEX_PARSE |
|
programme_id = parsed.nil? ? input_ref : parsed[2] |
|
|
|
puts "Obtaining programme '#{programme_id}'" |
|
|
|
# fetch the playlist |
|
|
|
playlist_url = URI.parse "http://www.bbc.co.uk/iplayer/playlist/#{programme_id}" |
|
playlist_response = Net::HTTP.get_response playlist_url |
|
playlist_obj = Nokogiri::XML playlist_response.body |
|
|
|
playlist_obj.remove_namespaces! |
|
|
|
item_ref = playlist_obj.xpath("//playlist/item").first |
|
media_id = item_ref.attribute "identifier" |
|
|
|
# get the programme title |
|
|
|
title_ref = playlist_obj.xpath("//playlist/title").first |
|
title = title_ref.text |
|
puts "\t#{title}" |
|
|
|
# get the summary |
|
|
|
summary_ref = playlist_obj.xpath("//playlist/summary").first |
|
summary = summary_ref.text |
|
puts "\t#{summary}" |
|
|
|
# get the DJ/show |
|
|
|
site_ref = playlist_obj.xpath("//playlist/item/passionSite").first |
|
site = site_ref.text |
|
puts "\tPart of #{site} show" |
|
|
|
# build the file for sanity |
|
output_file = File.extname(output).empty? ? "#{output}/#{title.gsub(REGEX_BAD_TITLE, "-")}.m4a" : output |
|
raise "File already downloaded" if File.exists?(output_file) |
|
|
|
# obtain data from metadata service |
|
|
|
meta_data_url = URI.parse "http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/transferformat/plain/vpid/#{media_id}" |
|
document_response = Net::HTTP.get_response meta_data_url |
|
media_obj = Nokogiri::XML document_response.body |
|
media_obj.remove_namespaces! |
|
|
|
# find the AAC RTMP streams |
|
streams = [] |
|
target = nil |
|
|
|
media_obj.xpath("//mediaSelection/media[@encoding='aac']").each do |element| |
|
data = {:bitrate => element.attribute("bitrate").to_s } |
|
|
|
connection_object = element.xpath(element, "connection").first |
|
data[:application] = connection_object.attribute("application").to_s |
|
data[:string] = connection_object.attribute("authString").to_s.gsub("&", "&") |
|
data[:identifier] = connection_object.attribute("identifier").to_s |
|
data[:server] = connection_object.attribute("server").to_s |
|
|
|
if target_bitrate.to_s.eql?(data[:bitrate].to_s) |
|
target = data |
|
break |
|
end |
|
|
|
streams << data |
|
end |
|
|
|
# sort by bitrate to obtain the highest |
|
if target.nil? |
|
puts "Finding the highest bitrate" |
|
target = (streams.sort {|a,b| b[:bitrate].to_i <=> a[:bitrate].to_i}).first |
|
end |
|
|
|
puts "Using #{target[:bitrate]}" |
|
|
|
# allocate the file for downloading |
|
file = Tempfile.new(["rtmpdump",".flv"]) |
|
|
|
# build the command |
|
application_ref = "#{target[:application]}?#{target[:string]}" |
|
rtmpdump_args = [rtmpdump_bin, "-r \"rtmp://#{target[:server]}:1935/#{application_ref}\"", "-a \"#{application_ref}\"", "-y \"#{target[:identifier]}\"", "-o \"#{file.path}\""] |
|
|
|
dump_command = rtmpdump_args.join " " |
|
|
|
puts "Downloading to #{file.path}" |
|
`#{dump_command}` |
|
|
|
# check file exists before proceeding |
|
raise "Couldn't locate the file (#{file.path})" unless File.exists?(file.path) |
|
|
|
# query what bitrate we can achieve |
|
ffmpeg_base = "#{ffmpeg_bin} -i #{file.path}" |
|
bitrate = 128 |
|
Open3.popen3(ffmpeg_base) do |stdin, stdout, stderr, wait_thr| |
|
stderr_bitrate = REGEX_FFMPEG.match(stderr.read()) |
|
if (stderr_bitrate.nil?) |
|
stdout_bitrate = REGEX_FFMPEG.match(stdout.read()) |
|
bitrate = stdout_bitrate[1].to_i unless stdout_bitrate.nil? |
|
else |
|
bitrate = stderr_bitrate[1].to_i |
|
end |
|
end |
|
|
|
ffmpeg_full = "#{ffmpeg_base} -metadata artist=\"#{site}\" -metadata title=\"#{title}\" -metadata comments=\"#{summary}\" -acodec libfaac -ab #{bitrate}k -ar 44100 -ac 2 #{output_file}" |
|
`#{ffmpeg_full}` |
|
|
|
file.unlink |