Last active
August 29, 2015 14:04
-
-
Save abrahaj/981f2f8793c0c4027a6c to your computer and use it in GitHub Desktop.
phpDom
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//Libraria eshte https://code.google.com/p/phpquery/ | |
ini_set ( "display_errors", 1 ); | |
require ('phpQuery.php'); | |
//Per ndonje lidhje databaze | |
require_once '../v1/sysconfig.php'; | |
//URL mund ta kalosh dhe si parameter | |
$url = "http://www.ata.gov.al/ekon-leku-i-qendrueshem-kundrejt-valutave-kryesore-59843.html"; | |
$rssUrl = "http://www.ata.gov.al/rss"; | |
// print_r(retrieveArticleLinks ( $rssUrl )); | |
print_r ( fetchData ( $url ) ); | |
/** | |
* Fetch Data and put them in an array | |
*/ | |
function fetchData($url) { | |
$dom = phpQuery::newDocumentHTML ( connect ( $url ) ); | |
$articleDom = $dom->find ( "article" ); | |
$article ["title"] = $dom->find ( "article" )->find ( "h1" )->html (); | |
$article ["author"] = $dom->find ( "article" )->find ( "span" )->find ( "a" )->html (); | |
$article ["date"] = $dom->find ( "article" )->find ( "time" )->html (); | |
$article ["body"] = $dom->find ( "article" )->find ( "div.entry-content" )->html (); | |
$article ["image"] = $dom->find ( "article" )->find ( "img" )->attr ( "src" ); | |
$article ["category"] = $dom->find ( "article:tag" )->attr("content"); | |
$article ["url"] = $url; | |
return $article; | |
} | |
/** | |
* Connect to URL and retrieve all content | |
*/ | |
function connect($url) { | |
$curl = curl_init (); | |
// Optional - Setup headers - The same headers from Firefox version 2.0.0.6 | |
// below was split up because php.net said the line was too long. :/ | |
$header [0] = "Accept: text/xml,application/xml,application/xhtml+xml,"; | |
$header [0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; | |
$header [] = "Cache-Control: max-age=0"; | |
$header [] = "Connection: keep-alive"; | |
$header [] = "Keep-Alive: 900"; | |
$header [] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; | |
$header [] = "Accept-Language: en-us,en;q=0.5"; | |
$header [] = "Pragma: "; // browsers keep this blank. | |
curl_setopt ( $curl, CURLOPT_URL, $url ); | |
curl_setopt ( $curl, CURLOPT_USERAGENT, 'Quareos/2.1 (+http://agent.quareos.com/)' ); | |
curl_setopt ( $curl, CURLOPT_COOKIEFILE, '/var/www/vhosts/infoarkiva.com/httpdocs/v1/config/korriericookie.txt' ); | |
curl_setopt ( $curl, CURLOPT_HTTPHEADER, $header ); | |
curl_setopt ( $curl, CURLOPT_REFERER, 'http://www.quareos.com' ); | |
curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip,deflate' ); | |
curl_setopt ( $curl, CURLOPT_AUTOREFERER, true ); | |
curl_setopt ( $curl, CURLOPT_FOLLOWLOCATION, TRUE ); | |
curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, 1 ); | |
curl_setopt ( $curl, CURLOPT_TIMEOUT, 290 ); | |
$html = curl_exec ( $curl ); // execute the curl command | |
// //some debuging can be done here | |
/* | |
* $info = curl_getinfo($curl); if (!$info['http_code']==200) echo "Error not 200";//die(mail|log); print_r (curl_getinfo($curl)); //- Kthen te gjithe headerat nga response curl_close($curl); // close the connection | |
*/ | |
// echo $html; | |
return $html; // and finally, return $html | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment