Last active
February 10, 2017 13:05
-
-
Save silvioq/6957a72f279ca7fb729a5ea685ffbcde to your computer and use it in GitHub Desktop.
Proyecto de prueba para obtener datos de una página
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vendor/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ID: 12170236 | |
Installation: | |
composer install | |
Usage: | |
php crawler.php URL-to-process | |
Example: | |
php crawler.php https://www.freelancer.com/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"require": { | |
"fabpot/goutte": "^3.2" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_readme": [ | |
"This file locks the dependencies of your project to a known state", | |
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", | |
"This file is @generated automatically" | |
], | |
"hash": "defc342ae76c7f0ce5b64dd4004facca", | |
"content-hash": "425aac93d170ece8a1f83c754fbc0f1d", | |
"packages": [ | |
{ | |
"name": "fabpot/goutte", | |
"version": "v3.2.0", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/FriendsOfPHP/Goutte.git", | |
"reference": "8cc89de5e71daf84051859616891d3320d88a9e8" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/FriendsOfPHP/Goutte/zipball/8cc89de5e71daf84051859616891d3320d88a9e8", | |
"reference": "8cc89de5e71daf84051859616891d3320d88a9e8", | |
"shasum": "" | |
}, | |
"require": { | |
"guzzlehttp/guzzle": "^6.0", | |
"php": ">=5.5.0", | |
"symfony/browser-kit": "~2.1|~3.0", | |
"symfony/css-selector": "~2.1|~3.0", | |
"symfony/dom-crawler": "~2.1|~3.0" | |
}, | |
"type": "application", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.2-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Goutte\\": "Goutte" | |
} | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Fabien Potencier", | |
"email": "fabien@symfony.com" | |
} | |
], | |
"description": "A simple PHP Web Scraper", | |
"homepage": "https://github.com/FriendsOfPHP/Goutte", | |
"keywords": [ | |
"scraper" | |
], | |
"time": "2016-11-15 16:27:29" | |
}, | |
{ | |
"name": "guzzlehttp/guzzle", | |
"version": "6.2.2", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/guzzle/guzzle.git", | |
"reference": "ebf29dee597f02f09f4d5bbecc68230ea9b08f60" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/guzzle/guzzle/zipball/ebf29dee597f02f09f4d5bbecc68230ea9b08f60", | |
"reference": "ebf29dee597f02f09f4d5bbecc68230ea9b08f60", | |
"shasum": "" | |
}, | |
"require": { | |
"guzzlehttp/promises": "^1.0", | |
"guzzlehttp/psr7": "^1.3.1", | |
"php": ">=5.5" | |
}, | |
"require-dev": { | |
"ext-curl": "*", | |
"phpunit/phpunit": "^4.0", | |
"psr/log": "^1.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "6.2-dev" | |
} | |
}, | |
"autoload": { | |
"files": [ | |
"src/functions_include.php" | |
], | |
"psr-4": { | |
"GuzzleHttp\\": "src/" | |
} | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Michael Dowling", | |
"email": "mtdowling@gmail.com", | |
"homepage": "https://github.com/mtdowling" | |
} | |
], | |
"description": "Guzzle is a PHP HTTP client library", | |
"homepage": "http://guzzlephp.org/", | |
"keywords": [ | |
"client", | |
"curl", | |
"framework", | |
"http", | |
"http client", | |
"rest", | |
"web service" | |
], | |
"time": "2016-10-08 15:01:37" | |
}, | |
{ | |
"name": "guzzlehttp/promises", | |
"version": "1.3.0", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/guzzle/promises.git", | |
"reference": "2693c101803ca78b27972d84081d027fca790a1e" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/guzzle/promises/zipball/2693c101803ca78b27972d84081d027fca790a1e", | |
"reference": "2693c101803ca78b27972d84081d027fca790a1e", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.0" | |
}, | |
"require-dev": { | |
"phpunit/phpunit": "~4.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.0-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"GuzzleHttp\\Promise\\": "src/" | |
}, | |
"files": [ | |
"src/functions_include.php" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Michael Dowling", | |
"email": "mtdowling@gmail.com", | |
"homepage": "https://github.com/mtdowling" | |
} | |
], | |
"description": "Guzzle promises library", | |
"keywords": [ | |
"promise" | |
], | |
"time": "2016-11-18 17:47:58" | |
}, | |
{ | |
"name": "guzzlehttp/psr7", | |
"version": "1.3.1", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/guzzle/psr7.git", | |
"reference": "5c6447c9df362e8f8093bda8f5d8873fe5c7f65b" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/guzzle/psr7/zipball/5c6447c9df362e8f8093bda8f5d8873fe5c7f65b", | |
"reference": "5c6447c9df362e8f8093bda8f5d8873fe5c7f65b", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.4.0", | |
"psr/http-message": "~1.0" | |
}, | |
"provide": { | |
"psr/http-message-implementation": "1.0" | |
}, | |
"require-dev": { | |
"phpunit/phpunit": "~4.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.4-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"GuzzleHttp\\Psr7\\": "src/" | |
}, | |
"files": [ | |
"src/functions_include.php" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Michael Dowling", | |
"email": "mtdowling@gmail.com", | |
"homepage": "https://github.com/mtdowling" | |
} | |
], | |
"description": "PSR-7 message implementation", | |
"keywords": [ | |
"http", | |
"message", | |
"stream", | |
"uri" | |
], | |
"time": "2016-06-24 23:00:38" | |
}, | |
{ | |
"name": "psr/http-message", | |
"version": "1.0.1", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/php-fig/http-message.git", | |
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363", | |
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.3.0" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.0.x-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Psr\\Http\\Message\\": "src/" | |
} | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "PHP-FIG", | |
"homepage": "http://www.php-fig.org/" | |
} | |
], | |
"description": "Common interface for HTTP messages", | |
"homepage": "https://github.com/php-fig/http-message", | |
"keywords": [ | |
"http", | |
"http-message", | |
"psr", | |
"psr-7", | |
"request", | |
"response" | |
], | |
"time": "2016-08-06 14:39:51" | |
}, | |
{ | |
"name": "symfony/browser-kit", | |
"version": "v3.1.7", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/browser-kit.git", | |
"reference": "901319a31c9b3cee7857b4aeeb81b5d64dfa34fc" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/browser-kit/zipball/901319a31c9b3cee7857b4aeeb81b5d64dfa34fc", | |
"reference": "901319a31c9b3cee7857b4aeeb81b5d64dfa34fc", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.9", | |
"symfony/dom-crawler": "~2.8|~3.0" | |
}, | |
"require-dev": { | |
"symfony/css-selector": "~2.8|~3.0", | |
"symfony/process": "~2.8|~3.0" | |
}, | |
"suggest": { | |
"symfony/process": "" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.1-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Component\\BrowserKit\\": "" | |
}, | |
"exclude-from-classmap": [ | |
"/Tests/" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Fabien Potencier", | |
"email": "fabien@symfony.com" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony BrowserKit Component", | |
"homepage": "https://symfony.com", | |
"time": "2016-09-06 11:02:40" | |
}, | |
{ | |
"name": "symfony/css-selector", | |
"version": "v3.1.7", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/css-selector.git", | |
"reference": "a37b3359566415a91cba55a2d95820b3fa1a9658" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/css-selector/zipball/a37b3359566415a91cba55a2d95820b3fa1a9658", | |
"reference": "a37b3359566415a91cba55a2d95820b3fa1a9658", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.9" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.1-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Component\\CssSelector\\": "" | |
}, | |
"exclude-from-classmap": [ | |
"/Tests/" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Jean-François Simon", | |
"email": "jeanfrancois.simon@sensiolabs.com" | |
}, | |
{ | |
"name": "Fabien Potencier", | |
"email": "fabien@symfony.com" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony CssSelector Component", | |
"homepage": "https://symfony.com", | |
"time": "2016-11-03 08:04:31" | |
}, | |
{ | |
"name": "symfony/dom-crawler", | |
"version": "v3.1.7", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/dom-crawler.git", | |
"reference": "1eb3b4d216e8db117218dd2bb7d23dfe67bdf518" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/dom-crawler/zipball/1eb3b4d216e8db117218dd2bb7d23dfe67bdf518", | |
"reference": "1eb3b4d216e8db117218dd2bb7d23dfe67bdf518", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.5.9", | |
"symfony/polyfill-mbstring": "~1.0" | |
}, | |
"require-dev": { | |
"symfony/css-selector": "~2.8|~3.0" | |
}, | |
"suggest": { | |
"symfony/css-selector": "" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "3.1-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Component\\DomCrawler\\": "" | |
}, | |
"exclude-from-classmap": [ | |
"/Tests/" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Fabien Potencier", | |
"email": "fabien@symfony.com" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony DomCrawler Component", | |
"homepage": "https://symfony.com", | |
"time": "2016-11-14 16:20:02" | |
}, | |
{ | |
"name": "symfony/polyfill-mbstring", | |
"version": "v1.3.0", | |
"source": { | |
"type": "git", | |
"url": "https://github.com/symfony/polyfill-mbstring.git", | |
"reference": "e79d363049d1c2128f133a2667e4f4190904f7f4" | |
}, | |
"dist": { | |
"type": "zip", | |
"url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/e79d363049d1c2128f133a2667e4f4190904f7f4", | |
"reference": "e79d363049d1c2128f133a2667e4f4190904f7f4", | |
"shasum": "" | |
}, | |
"require": { | |
"php": ">=5.3.3" | |
}, | |
"suggest": { | |
"ext-mbstring": "For best performance" | |
}, | |
"type": "library", | |
"extra": { | |
"branch-alias": { | |
"dev-master": "1.3-dev" | |
} | |
}, | |
"autoload": { | |
"psr-4": { | |
"Symfony\\Polyfill\\Mbstring\\": "" | |
}, | |
"files": [ | |
"bootstrap.php" | |
] | |
}, | |
"notification-url": "https://packagist.org/downloads/", | |
"license": [ | |
"MIT" | |
], | |
"authors": [ | |
{ | |
"name": "Nicolas Grekas", | |
"email": "p@tchwork.com" | |
}, | |
{ | |
"name": "Symfony Community", | |
"homepage": "https://symfony.com/contributors" | |
} | |
], | |
"description": "Symfony polyfill for the Mbstring extension", | |
"homepage": "https://symfony.com", | |
"keywords": [ | |
"compatibility", | |
"mbstring", | |
"polyfill", | |
"portable", | |
"shim" | |
], | |
"time": "2016-11-14 01:06:16" | |
} | |
], | |
"packages-dev": [], | |
"aliases": [], | |
"minimum-stability": "stable", | |
"stability-flags": [], | |
"prefer-stable": false, | |
"prefer-lowest": false, | |
"platform": [], | |
"platform-dev": [] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# Installation: | |
# composer install | |
# | |
# Usage: | |
# php crawler.php URL-to-process | |
# | |
# Example | |
# php crawler.php https://www.freelancer.com/ | |
require "vendor/autoload.php"; | |
if( count( $argv ) != 2 ) | |
{ | |
echo "Usage:\n php crawler.php URL-to-process\n\nExample:\n php crawler.php https://www.freelancer.com/\n\n"; | |
exit( 1 ); | |
} | |
$client = new Goutte\Client; | |
try | |
{ | |
$start = microtime(true); | |
$crawler = $client->request('GET', $argv[1] ); | |
$end = microtime(true); | |
} catch( GuzzleHttp\Exception\ConnectException $e ) | |
{ | |
print( "Can't connect to " . $argv[1] ."\nDetailed error:\n" ); | |
print $e->getMessage() . PHP_EOL; | |
exit( 1 ); | |
} | |
$result = [ | |
"title" => 'No title', | |
"description" => 'No metatag description', | |
"keywords" => 'No metatag keywords', | |
"time" => sprintf("%d ms." , ($end - $start)*1000), | |
] ; | |
$crawler->filter("head > meta")->each(function($node,$i) use( &$result ){ | |
$name = strtolower( $node->attr('name') ); | |
switch( $name ) | |
{ | |
case 'description': | |
case 'keywords': | |
$result[$name] = strlen( $node->attr('content') ); | |
} | |
}); | |
$crawler->filter("head > title")->each(function($node) use (&$result){ | |
$result['title'] = $node->text(); | |
} ); | |
print_r( $result ); | |
exit( 0 ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment