Last active
April 10, 2017 20:43
-
-
Save bulton-fr/b61f2c8f89429dd109a86c9b559c1a1a to your computer and use it in GitHub Desktop.
del.icio.us Export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
NB_PAGES=28 #Replace by the number of the page for your account | |
ALL_PIDS="" | |
# Replace "yourValueHere" by your cookie value | |
COOKIES="loginemail=\"yourValueHere\";delavid=yourValueHere;H1:yourValueHere=1;sm_dapi_session_yourValueHere=1;delvisitor=\"yourValueHere\";session=yourValueHere" | |
AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36" | |
for PAGE in $(seq 1 $NB_PAGES) | |
do | |
OUTPUT="$(pwd)/pages/${PAGE}.html" | |
#Replace "yourLoginHere" by your login. | |
curl -b "$COOKIES" -A "$AGENT" -o "$OUTPUT" "https://del.icio.us/yourLoginHere?&page=${PAGE}" & | |
ALL_PIDS="$ALL_PIDS $!" | |
done | |
for PID in $ALL_PIDS | |
do | |
wait $PID | |
done | |
echo "Finished." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//Disable libxml warning. Delicious html code is not valid :o | |
libxml_use_internal_errors(true); | |
/** | |
* Parse the dom to obtains bookmarks | |
*/ | |
class Parser | |
{ | |
/** | |
* @var \DOMDocument $dom DOMDocument's instance for parsed html file | |
*/ | |
protected $dom; | |
/** | |
* @var \DomXpath $xpath For use xPath queries into the DOMDocument | |
*/ | |
protected $xpath; | |
/** | |
* Constructor | |
* Init $dom and $xpath properties | |
* | |
* @param string $file The html file to parse | |
*/ | |
public function __construct($file) | |
{ | |
$this->dom = new DOMDocument; | |
$this->dom->loadHTMLFile(realpath($file)); | |
$this->xpath = new DomXPath($this->dom); | |
} | |
/** | |
* Find all bookmarks into the html file | |
* | |
* @return \Generator | |
*/ | |
public function findBookmarks() | |
{ | |
$bookmarks = $this->xpath->query( | |
'//div[contains(@class, "articleThumbBlockOuter")]' | |
); | |
foreach($bookmarks as $bookmark) { | |
yield from $this->parseBookmark($bookmark); | |
} | |
} | |
/** | |
* Parse a bookmark, obtain informations and return a | |
* generator the bookmark | |
* | |
* @param \DOMElement $bookmark The bookmark to parse | |
* | |
* @return \Generator | |
*/ | |
protected function parseBookmark($bookmark) | |
{ | |
yield (object) [ | |
'id' => $this->obtainId($bookmark), | |
'date' => $this->obtainDate($bookmark), | |
'title' => $this->obtainTitle($bookmark), | |
'link' => $this->obtainLink($bookmark), | |
'tags' => $this->obtainTags($bookmark) | |
]; | |
} | |
/** | |
* Obtain the delicious id for a bookmark | |
* | |
* @param \DOMElement $bookmark The bookmark to parse | |
* | |
* @return string | |
*/ | |
protected function obtainId($bookmark) | |
{ | |
return $bookmark->getAttribute('md5'); | |
} | |
/** | |
* Obtain the add date in delicious for a bookmark | |
* | |
* @param \DOMElement $bookmark The bookmark to parse | |
* | |
* @return \DateTime | |
*/ | |
protected function obtainDate($bookmark) | |
{ | |
$date = $bookmark->getAttribute('date'); | |
return \DateTime::createFromFormat('U', $date); | |
} | |
/** | |
* Obtain the page title for a bookmark | |
* | |
* @param \DOMElement $bookmark The bookmark to parse | |
* | |
* @return string | |
*/ | |
protected function obtainTitle($bookmark) | |
{ | |
return $this->xpath->query( | |
'.//div[contains(@class, "articleTitlePan")]//a', | |
$bookmark | |
) | |
->item(0) | |
->getAttribute('title'); | |
} | |
/** | |
* Obtain the page link for a bookmark | |
* | |
* @param \DOMElement $bookmark The bookmark to parse | |
* | |
* @return string | |
*/ | |
protected function obtainLink($bookmark) | |
{ | |
return $this->xpath->query( | |
'.//div[contains(@class, "articleInfoPan")]/p[1]/a', | |
$bookmark | |
) | |
->item(0) | |
->getAttribute('href'); | |
} | |
/** | |
* Obtain the delicious tags for a bookmark | |
* | |
* @param \DOMElement $bookmark The bookmark to parse | |
* | |
* @return \Generator | |
*/ | |
protected function obtainTags($bookmark) | |
{ | |
$nodeTags = $this->xpath->query('.//ul[contains(@class, "tagName")]/li', $bookmark); | |
foreach ($nodeTags as $tag) { | |
yield $this->xpath->query('.//a', $tag)->item(0)->nodeValue; | |
} | |
} | |
} | |
//Get list of the pages | |
$pathPages = __DIR__.'/pages/'; | |
$lstPages = glob($pathPages.'*.html'); | |
// List of bookmarks and tags | |
// It's for the exemple, add into array after using the generators is useless... | |
$lstBookmarks = []; | |
$lstTags = []; | |
foreach ($lstPages as $pagePath) { | |
$parser = new Parser($pagePath); | |
$bookmarks = $parser->findBookmarks(); | |
foreach ($bookmarks as $bookmark) { | |
$lstBookmarks[] = $bookmark; | |
foreach ($bookmark->tags as $tagName) { | |
//I prefer use isset() instead of array_search() | |
if (!isset($lstTags[$tagName])) { | |
$lstTags[$tagName] = true; | |
} | |
} | |
} | |
} | |
var_dump($lstBookmarks); | |
var_dump($lstTags); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment