Created
May 31, 2013 16:10
-
-
Save AtkinsSJ/5686061 to your computer and use it in GitHub Desktop.
Scraper for #1GAM games. Grabs the information from http://www.onegameamonth.com/gameslist and generates a csv file.
Because of how the #1GAM site is set-up, this only gets the entries for the current month. Hopefully there will be an official API at some point.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
error_reporting(E_ALL ^ E_NOTICE); | |
$url = 'http://www.onegameamonth.com/gameslist'; | |
$games = array(); | |
function addGame($gameDiv) { | |
$game = array(); | |
$ga = $gameDiv->firstChild; | |
$game['url'] = $ga->attributes->getNamedItem('href')->textContent; | |
$game['name'] = $ga->childNodes->item(1)->firstChild->textContent; | |
$game['subtitle'] = $ga->childNodes->item(2)->firstChild->textContent; | |
$game['copyright'] = $ga->childNodes->item(3)->firstChild->textContent; | |
$game['tags'] = $ga->childNodes->item(4)->firstChild->textContent; | |
$game['description'] = $ga->childNodes->item(5)->firstChild->textContent; | |
$gauser = $gameDiv->lastChild; | |
$game['profile'] = 'http://onegameamonth.com/' . $gauser->childNodes->item(0)->textContent; | |
$game['twitter'] = 'http://twitter.com/' . $gauser->childNodes->item(0)->textContent; | |
global $games; | |
$games[] = $game; | |
} | |
$dom = new DOMDocument(); | |
$dom->loadHTMLFile($url); | |
$divs = $dom->getElementsByTagName('div'); | |
foreach ($divs as $div) { | |
if ($div->attributes->getNamedItem('class')->textContent == 'gadiv') { | |
addGame($div); | |
} | |
} | |
$file = fopen('1gam.csv','w'); | |
fputcsv($file, array('url', 'name', 'subtitle', 'copyright', 'tags', 'description', 'profile', 'twitter')); | |
foreach ($games as $game) { | |
fputcsv($file, $game); | |
} | |
fclose($file); | |
echo 'done'; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment