statsmaths · August 29, 2015 14:26
diff --git a/facebook_api_pull.R b/facebook_api_pull.R
 #' Title: Facebook Page API Downloads
 #' Author: Taylor Arnold (taylor.arnold@acm.org)
 #' Created: 2015-07-28 20:50
 #' Updated: 2015-08-15 15:43
 #' Description: Cycle over the Facebook API to grab all the
 #'                of the project page posts and save as a 
 #'                single csv file.

 # You need to fill these three fields in appropriately
 access_token = ""
 page_id = ""
 output_path_posts = "~/Desktop/facebook_api_output_posts.csv"
 output_path_photo = "~/Desktop/facebook_api_output_photos.csv"
 output_path_posts_comments = "~/Desktop/facebook_api_output_posts_comments.csv"
 output_path_photo_comments = "~/Desktop/facebook_api_output_photo_comments.csv"
 output_dir_photo_files = "~/Desktop/img"

 # Then, run the remainder of the file all at once; that's it!
 dataOutput = NULL
 url_start = "https://graph.facebook.com/v2.4/"
 url_end = "/posts?limit=50"
 since_str = ""
 url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

 while(TRUE) {
  curl_download(url, t <- tempfile())
  l = fromJSON(t)
  if (length(l$data) == 0L) break
  dataOutput = rbind.fill(dataOutput, l$data)
  url = l$paging$`next`
  print(nrow(dataOutput))
 }

 dataOutput$message <- gsub('\n', ' ', dataOutput$message)
 dataOutput$story <- gsub('\n', ' ', dataOutput$story)
 dataOutput$message <- gsub('\t', ' ', dataOutput$message)
 dataOutput$story <- gsub('\t', ' ', dataOutput$story)

 write.table(dataOutput, output_path_posts, row.names=FALSE)

 # Now, repeat for photos
 dataOutput = NULL
 url_start = "https://graph.facebook.com/v2.4/"
 url_end = "/photos?fields=id,created_time,link,name,images&type=uploaded"

 since_str = ""
 url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

 while(TRUE) {
  curl_download(url, t <- tempfile())
  l = fromJSON(t)
  dataOutput = rbind.fill(dataOutput, l$data)
  url = l$paging$`next`
  print(nrow(dataOutput))
  if (is.null(url)) break
 }

 url <- sapply(lapply(dataOutput$images, function(v) v$source),function(v)v[1])
 dataOutput <- dataOutput[,-5]
 dataOutput$url = url

 for (i in 1:ncol(dataOutput)) {
 dataOutput[,i] = gsub('\n', ' ', dataOutput[,i])
 dataOutput[,i] = gsub('\r', ' ', dataOutput[,i])
 dataOutput[,i] = gsub('\t', ' ', dataOutput[,i])
 }
 dataOutput <- dataOutput[,-4]

 write.table(dataOutput, output_path_photo, row.names=FALSE, sep="\t")

 # grab comments from posts:
 posts <- read.table(output_path_posts, as.is=TRUE, header=TRUE)$id
 url_end = "/comments?fields=from,message,created_time,id"

 dataOutput <- NULL
 for (i in 1:length(posts)) {
  url = paste0(url_start, posts[i], url_end, "&access_token=", access_token, since_str)
  curl_download(url, t <- tempfile())
  l = fromJSON(t)$data
  if (length(l)) {
    if (any(names(l) == "from")) {
      l$from <- l$from$name
      names(l)[names(l) == "from"] = "commenter_name"
    }
    if (length(l)) dataOutput = rbind.fill(dataOutput, l)
  }
  print(nrow(dataOutput))
 }

 write.table(dataOutput[,c(4,1,2,3)], output_path_posts_comments, row.names=FALSE, sep="\t")

 # grab comments from photos:
 photos <- as.character(read.table(output_path_photo, as.is=TRUE, header=TRUE)$id)
 photos <- paste0(page_id,"_",photos)
 url_end = "/comments?fields=from,message,created_time,id"

 dataOutput <- NULL
 for (i in 1:length(photos)) {
  url = paste0(url_start, photos[i], url_end, "&access_token=", access_token, since_str)
  curl_download(url, t <- tempfile())
  l = fromJSON(t)$data
  if (any(names(l) == "from")) {
    l$from <- l$from$name
    names(l)[names(l) == "from"] = "commenter_name"
  }
  if (length(l)) dataOutput = rbind.fill(dataOutput, l)
  print(nrow(dataOutput))
 }

 write.table(dataOutput[,c(4,1,2,3)], output_path_photo_comments, row.names=FALSE, sep="\t")

 # download all of the photos
 dir.create(output_dir_photo_files)
 photos <- read.table(output_path_photo, as.is=TRUE, header=TRUE)$url

 for (i in length(photos)) {
  out <- basename(photos[i])
  out <- substr(out, 1, regexpr(".jpg", out, fixed=TRUE)+3L)
  curl_download(photos[i], paste0(output_dir_photo_files, "/", out))
 }
	#' Title: Facebook Page API Downloads
	#' Author: Taylor Arnold (taylor.arnold@acm.org)
	#' Created: 2015-07-28 20:50
	#' Updated: 2015-08-15 15:43
	#' Description: Cycle over the Facebook API to grab all the
	#' of the project page posts and save as a
	#' single csv file.

	# You need to fill these three fields in appropriately
	access_token = ""
	page_id = ""
	output_path_posts = "~/Desktop/facebook_api_output_posts.csv"
	output_path_photo = "~/Desktop/facebook_api_output_photos.csv"
	output_path_posts_comments = "~/Desktop/facebook_api_output_posts_comments.csv"
	output_path_photo_comments = "~/Desktop/facebook_api_output_photo_comments.csv"
	output_dir_photo_files = "~/Desktop/img"

	# Then, run the remainder of the file all at once; that's it!
	dataOutput = NULL
	url_start = "https://graph.facebook.com/v2.4/"
	url_end = "/posts?limit=50"
	since_str = ""
	url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

	while(TRUE) {
	curl_download(url, t <- tempfile())
	l = fromJSON(t)
	if (length(l$data) == 0L) break
	dataOutput = rbind.fill(dataOutput, l$data)
	url = l$paging$`next`
	print(nrow(dataOutput))
	}

	dataOutput$message <- gsub('\n', ' ', dataOutput$message)
	dataOutput$story <- gsub('\n', ' ', dataOutput$story)
	dataOutput$message <- gsub('\t', ' ', dataOutput$message)
	dataOutput$story <- gsub('\t', ' ', dataOutput$story)

	write.table(dataOutput, output_path_posts, row.names=FALSE)

	# Now, repeat for photos
	dataOutput = NULL
	url_start = "https://graph.facebook.com/v2.4/"
	url_end = "/photos?fields=id,created_time,link,name,images&type=uploaded"

	since_str = ""
	url = paste0(url_start, page_id, url_end, "&access_token=", access_token, since_str)

	while(TRUE) {
	curl_download(url, t <- tempfile())
	l = fromJSON(t)
	dataOutput = rbind.fill(dataOutput, l$data)
	url = l$paging$`next`
	print(nrow(dataOutput))
	if (is.null(url)) break
	}

	url <- sapply(lapply(dataOutput$images, function(v) v$source),function(v)v[1])
	dataOutput <- dataOutput[,-5]
	dataOutput$url = url

	for (i in 1:ncol(dataOutput)) {
	dataOutput[,i] = gsub('\n', ' ', dataOutput[,i])
	dataOutput[,i] = gsub('\r', ' ', dataOutput[,i])
	dataOutput[,i] = gsub('\t', ' ', dataOutput[,i])
	}
	dataOutput <- dataOutput[,-4]

	write.table(dataOutput, output_path_photo, row.names=FALSE, sep="\t")

	# grab comments from posts:
	posts <- read.table(output_path_posts, as.is=TRUE, header=TRUE)$id
	url_end = "/comments?fields=from,message,created_time,id"

	dataOutput <- NULL
	for (i in 1:length(posts)) {
	url = paste0(url_start, posts[i], url_end, "&access_token=", access_token, since_str)
	curl_download(url, t <- tempfile())
	l = fromJSON(t)$data
	if (length(l)) {
	if (any(names(l) == "from")) {
	l$from <- l$from$name
	names(l)[names(l) == "from"] = "commenter_name"
	}
	if (length(l)) dataOutput = rbind.fill(dataOutput, l)
	}
	print(nrow(dataOutput))
	}

	write.table(dataOutput[,c(4,1,2,3)], output_path_posts_comments, row.names=FALSE, sep="\t")

	# grab comments from photos:
	photos <- as.character(read.table(output_path_photo, as.is=TRUE, header=TRUE)$id)
	photos <- paste0(page_id,"_",photos)
	url_end = "/comments?fields=from,message,created_time,id"

	dataOutput <- NULL
	for (i in 1:length(photos)) {
	url = paste0(url_start, photos[i], url_end, "&access_token=", access_token, since_str)
	curl_download(url, t <- tempfile())
	l = fromJSON(t)$data
	if (any(names(l) == "from")) {
	l$from <- l$from$name
	names(l)[names(l) == "from"] = "commenter_name"
	}
	if (length(l)) dataOutput = rbind.fill(dataOutput, l)
	print(nrow(dataOutput))
	}

	write.table(dataOutput[,c(4,1,2,3)], output_path_photo_comments, row.names=FALSE, sep="\t")

	# download all of the photos
	dir.create(output_dir_photo_files)
	photos <- read.table(output_path_photo, as.is=TRUE, header=TRUE)$url

	for (i in length(photos)) {
	out <- basename(photos[i])
	out <- substr(out, 1, regexpr(".jpg", out, fixed=TRUE)+3L)
	curl_download(photos[i], paste0(output_dir_photo_files, "/", out))
	}