Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save willboudle/a4b5a43fe41e70bf6c602410c6c37d27 to your computer and use it in GitHub Desktop.
Save willboudle/a4b5a43fe41e70bf6c602410c6c37d27 to your computer and use it in GitHub Desktop.
PHP convert XML sitemap to HTML sitemap.
<?php
$header = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<title>HTML Sitemap</title>
</head>
<body>';
set_time_limit(400);
$currentElement = '';
$currentLoc = '';
$map = "<h1>HTML Sitemap</h1>"."\n";
function parsePage($data)
{
global $map;
/*
if you want to trap a certain file extention then use the syntax below...
stripos($data, ".php")>0
stripos($data, ".htm")>0
stripos($data, ".asp")>0
*/
if ( stripos($data,".pdf") > 0 ) {
// if the url is a pdf document.
$map .= '<p><a href="'.$data.'">PDF document.</a></p>'."\n";
$map .= '<p>A pdf document.</p>'."\n";
} elseif ( stripos($data, ".txt")>0 ) {
// if the url is a text document
$map .= '<p><a href="'.$data.'">Text document.</a></p>'."\n";
$map .= '<p>A text document.</p>'."\n";
} else {
// try to open it anyway...
// make sure that you can read the file
if ( $urlh = @fopen($data, 'rb') ) {
$contents = '';
//check php version
if ( phpversion()>5 ) {
$contents = stream_get_contents($urlh);
} else {
while ( !feof($urlh) ) {
$contents .= fread($urlh, 8192);
};
};
// find the title
preg_match('/(?<=\<[Tt][Ii][Tt][Ll][Ee]\>)\s*?(.*?)\s*?(?=\<\/[Tt][Ii][Tt][Ll][Ee]\>)/U', $contents, $title);
$title = $title[0];
// find the first h1 tag
$header = array();
preg_match('/(?<=\<[Hh]2\>)(.*?)(?=\<\/[Hh]2\>)/U', $contents, $header);
$header = strip_tags($header[0]);
if ( strlen($title) > 0 && strlen($header) > 0 ) {
// print the title and h1 tag in combo
$map .= '<p class="link"><a href="'.str_replace('&','&amp;',$data).'" title="'.(strlen($header)>0?trim($header):trim($title)).'">'.trim($title).(strlen($header)>0?" - ".trim($header):'').'</a></p>'."\n";
} elseif ( strlen($title) > 0 ) {
$map .= '<p class="link"><a href="'.str_replace('&','&amp;',$data).'" title="'.trim($title).'">'.trim($title).'</a></p>'."\n";
} elseif ( strlen($header) > 0 ) {
$map .= '<p class="link"><a href="'.str_replace('&','&amp;',$data).'" title="'.trim($header).'">'.trim($header).'</a></p>'."\n";
};
// find description
preg_match('/(?<=\<[Mm][Ee][Tt][Aa]\s[Nn][Aa][Mm][Ee]\=\"[Dd]escription\" content\=\")(.*?)(?="\s*?\/?\>)/U', $contents, $description);
$description = $description[0];
// print description
if ( strlen($description)>0 ) {
$map .= '<p class="desc">'.trim($description).'</p>'."\n";
};
// close the file
fclose($urlh);
};
};
};
/////////// XML PARSE FUNCTIONS HERE /////////////
// the start element function
function startElement($xmlParser, $name, $attribs)
{
global $currentElement;
$currentElement = $name;
};
// the end element function
function endElement($parser, $name)
{
global $currentElement,$currentLoc;
if ( $currentElement == 'loc') {
parsePage($currentLoc);
$currentLoc = '';
};
$currentElement = '';
};
// the character data function
function characterData($parser, $data)
{
global $currentElement,$currentLoc;
// if the current element is loc then it will be a url
if ( $currentElement == 'loc' ) {
$currentLoc .= $data;
};
};
// create parse object
$xml_parser = xml_parser_create();
// turn off case folding!
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false);
// set start and end element functions
xml_set_element_handler($xml_parser,"startElement", "endElement");
// set character data function
xml_set_character_data_handler($xml_parser, "characterData");
// open xml file
if ( !($fp = fopen('sitemap.xml', "r")) ) {
die("could not open XML input");
};
// read the file - print error if something went wrong.
while ( $data = fread($fp,4096) ) {
if ( !xml_parse($xml_parser, $data,feof($fp)) ) {
die(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
};
};
// close file
fclose($fp);
$footer = '</body>
</html>';
// write output to a file
$fp = fopen('sitemap.html', "w+");
fwrite($fp,$header.$map.$footer);
fclose($fp);
// print output
echo $header.$map.$footer;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment