Created
April 3, 2017 02:28
-
-
Save SarahTaylorProject/3f3b1a98eda558c97e5bea8ede14f685 to your computer and use it in GitHub Desktop.
Working version of movie script that handles some errors well, and produces useful outputs (it is still vulnerable to international characters and to duplicate movie names though)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# April 3rd 2017 | |
# This version of testing the Open Movie Database API is working well but falls down with large numbers (possibly because the API deliberately throttles our requests after a certain threshold) | |
# THe other vulnerability is in internaational characters - it is definitely writing them out in a strange and unusable fashion | |
# BUT this does work: it outputs a list of results without splitting the strings (into "movie_api_result_unprocessed.csv") | |
# AND it writes a separate file each for writers and directors, with very good handling of the unusual text within these fields (e.g. commas, parentheses) | |
require "csv" | |
require "json" | |
require "net/http" | |
puts "START MOVIE TEST ******" | |
input_movies = CSV.read("movies.csv").map { |row| | |
[row[0], row[1]] | |
}.uniq | |
movie_api_results = input_movies.first(500).map { |id, title| | |
begin | |
data = JSON.parse(`curl "http://www.omdbapi.com/?t=#{title.gsub(/\s/,"+")}"`) | |
rescue Exception | |
puts "Error getting API results from #{title}" | |
data = {} | |
end | |
[id, title, data] | |
} | |
# output file 1: api results COMBINED without splitting | |
CSV.open("movie_api_result_unprocessed.csv", 'w') do |csv| | |
csv << ["input_movie_id", "input_movie_title", "Movie_Title_Match", "imdbID", "All_Directors", "All_Writers"] | |
movie_api_results.each do |id, title, movie_api_data| | |
#puts title | |
csv << [id, title, movie_api_data["Title"], movie_api_data["imdbID"], movie_api_data["Director"], movie_api_data["Writer"]] | |
end# of movie_api_results | |
end# of writing output file 1 csv | |
# output file 2: api results for WRITERS, split into separate rows | |
CSV.open("movie_api_result_writers.csv", 'w') do |csv| | |
csv << ["input_movie_id", "input_movie_title", "Movie_Title_Match", "All_Writers", "Individual_Writer"] | |
movie_api_results.each do |id, title, movie_api_data| | |
if ((movie_api_data["Writer"] != nil) and (movie_api_data["Writer"] != 'N/A')) | |
#puts movie_api_data["Writer"] | |
writers = movie_api_data["Writer"].split(", ").map { |name| name.gsub(/ \([^\)]+\)/, '')} | |
writers.each do |current_writer| | |
puts current_writer | |
csv << [id, title, movie_api_data["Title"], movie_api_data["Writer"], current_writer] | |
end | |
else | |
#puts "No movie result for #{title}" | |
#puts movie_api_data.inspect | |
end# for checking if nul | |
end# of movie_api_results | |
end# of writing output file 2 csv for writer results | |
# output file 3: api results for DIRECTORS, split into separate rows | |
CSV.open("movie_api_result_directors.csv", 'w') do |csv| | |
csv << ["input_movie_id", "input_movie_title", "Movie_Title_Match", "All_Directors", "Individual_Director"] | |
movie_api_results.each do |id, title, movie_api_data| | |
if ((movie_api_data["Director"] != nil) and (movie_api_data["Director"] != 'N/A')) | |
#puts movie_api_data["Director"] | |
directors = movie_api_data["Director"].split(", ").map { |name| name.gsub(/ \([^\)]+\)/, '')} | |
directors.each do |current_director| | |
#puts current_director | |
csv << [id, title, movie_api_data["Title"], movie_api_data["Director"], current_director] | |
end | |
else | |
#puts "No movie result for #{title}" | |
#puts movie_api_data.inspect | |
end# for checking if nul | |
end# of movie_api_results | |
end# of writing output file 2 csv for director results | |
puts "END MOVIE TEST ******" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This version was used to produce test outputs
Next stage is handling duplicate movies and international characters