Skip to content

Instantly share code, notes, and snippets.

@bulton-fr
Last active April 10, 2017 20:43
Show Gist options
  • Save bulton-fr/b61f2c8f89429dd109a86c9b559c1a1a to your computer and use it in GitHub Desktop.
Save bulton-fr/b61f2c8f89429dd109a86c9b559c1a1a to your computer and use it in GitHub Desktop.
del.icio.us Export
#!/bin/bash
NB_PAGES=28 #Replace by the number of the page for your account
ALL_PIDS=""
# Replace "yourValueHere" by your cookie value
COOKIES="loginemail=\"yourValueHere\";delavid=yourValueHere;H1:yourValueHere=1;sm_dapi_session_yourValueHere=1;delvisitor=\"yourValueHere\";session=yourValueHere"
AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36"
for PAGE in $(seq 1 $NB_PAGES)
do
OUTPUT="$(pwd)/pages/${PAGE}.html"
#Replace "yourLoginHere" by your login.
curl -b "$COOKIES" -A "$AGENT" -o "$OUTPUT" "https://del.icio.us/yourLoginHere?&page=${PAGE}" &
ALL_PIDS="$ALL_PIDS $!"
done
for PID in $ALL_PIDS
do
wait $PID
done
echo "Finished."
<?php
//Disable libxml warning. Delicious html code is not valid :o
libxml_use_internal_errors(true);
/**
* Parse the dom to obtains bookmarks
*/
class Parser
{
/**
* @var \DOMDocument $dom DOMDocument's instance for parsed html file
*/
protected $dom;
/**
* @var \DomXpath $xpath For use xPath queries into the DOMDocument
*/
protected $xpath;
/**
* Constructor
* Init $dom and $xpath properties
*
* @param string $file The html file to parse
*/
public function __construct($file)
{
$this->dom = new DOMDocument;
$this->dom->loadHTMLFile(realpath($file));
$this->xpath = new DomXPath($this->dom);
}
/**
* Find all bookmarks into the html file
*
* @return \Generator
*/
public function findBookmarks()
{
$bookmarks = $this->xpath->query(
'//div[contains(@class, "articleThumbBlockOuter")]'
);
foreach($bookmarks as $bookmark) {
yield from $this->parseBookmark($bookmark);
}
}
/**
* Parse a bookmark, obtain informations and return a
* generator the bookmark
*
* @param \DOMElement $bookmark The bookmark to parse
*
* @return \Generator
*/
protected function parseBookmark($bookmark)
{
yield (object) [
'id' => $this->obtainId($bookmark),
'date' => $this->obtainDate($bookmark),
'title' => $this->obtainTitle($bookmark),
'link' => $this->obtainLink($bookmark),
'tags' => $this->obtainTags($bookmark)
];
}
/**
* Obtain the delicious id for a bookmark
*
* @param \DOMElement $bookmark The bookmark to parse
*
* @return string
*/
protected function obtainId($bookmark)
{
return $bookmark->getAttribute('md5');
}
/**
* Obtain the add date in delicious for a bookmark
*
* @param \DOMElement $bookmark The bookmark to parse
*
* @return \DateTime
*/
protected function obtainDate($bookmark)
{
$date = $bookmark->getAttribute('date');
return \DateTime::createFromFormat('U', $date);
}
/**
* Obtain the page title for a bookmark
*
* @param \DOMElement $bookmark The bookmark to parse
*
* @return string
*/
protected function obtainTitle($bookmark)
{
return $this->xpath->query(
'.//div[contains(@class, "articleTitlePan")]//a',
$bookmark
)
->item(0)
->getAttribute('title');
}
/**
* Obtain the page link for a bookmark
*
* @param \DOMElement $bookmark The bookmark to parse
*
* @return string
*/
protected function obtainLink($bookmark)
{
return $this->xpath->query(
'.//div[contains(@class, "articleInfoPan")]/p[1]/a',
$bookmark
)
->item(0)
->getAttribute('href');
}
/**
* Obtain the delicious tags for a bookmark
*
* @param \DOMElement $bookmark The bookmark to parse
*
* @return \Generator
*/
protected function obtainTags($bookmark)
{
$nodeTags = $this->xpath->query('.//ul[contains(@class, "tagName")]/li', $bookmark);
foreach ($nodeTags as $tag) {
yield $this->xpath->query('.//a', $tag)->item(0)->nodeValue;
}
}
}
//Get list of the pages
$pathPages = __DIR__.'/pages/';
$lstPages = glob($pathPages.'*.html');
// List of bookmarks and tags
// It's for the exemple, add into array after using the generators is useless...
$lstBookmarks = [];
$lstTags = [];
foreach ($lstPages as $pagePath) {
$parser = new Parser($pagePath);
$bookmarks = $parser->findBookmarks();
foreach ($bookmarks as $bookmark) {
$lstBookmarks[] = $bookmark;
foreach ($bookmark->tags as $tagName) {
//I prefer use isset() instead of array_search()
if (!isset($lstTags[$tagName])) {
$lstTags[$tagName] = true;
}
}
}
}
var_dump($lstBookmarks);
var_dump($lstTags);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment