Created
May 7, 2020 00:00
-
-
Save willboudle/a4b5a43fe41e70bf6c602410c6c37d27 to your computer and use it in GitHub Desktop.
PHP convert XML sitemap to HTML sitemap.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$header = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> | |
<title>HTML Sitemap</title> | |
</head> | |
<body>'; | |
set_time_limit(400); | |
$currentElement = ''; | |
$currentLoc = ''; | |
$map = "<h1>HTML Sitemap</h1>"."\n"; | |
function parsePage($data) | |
{ | |
global $map; | |
/* | |
if you want to trap a certain file extention then use the syntax below... | |
stripos($data, ".php")>0 | |
stripos($data, ".htm")>0 | |
stripos($data, ".asp")>0 | |
*/ | |
if ( stripos($data,".pdf") > 0 ) { | |
// if the url is a pdf document. | |
$map .= '<p><a href="'.$data.'">PDF document.</a></p>'."\n"; | |
$map .= '<p>A pdf document.</p>'."\n"; | |
} elseif ( stripos($data, ".txt")>0 ) { | |
// if the url is a text document | |
$map .= '<p><a href="'.$data.'">Text document.</a></p>'."\n"; | |
$map .= '<p>A text document.</p>'."\n"; | |
} else { | |
// try to open it anyway... | |
// make sure that you can read the file | |
if ( $urlh = @fopen($data, 'rb') ) { | |
$contents = ''; | |
//check php version | |
if ( phpversion()>5 ) { | |
$contents = stream_get_contents($urlh); | |
} else { | |
while ( !feof($urlh) ) { | |
$contents .= fread($urlh, 8192); | |
}; | |
}; | |
// find the title | |
preg_match('/(?<=\<[Tt][Ii][Tt][Ll][Ee]\>)\s*?(.*?)\s*?(?=\<\/[Tt][Ii][Tt][Ll][Ee]\>)/U', $contents, $title); | |
$title = $title[0]; | |
// find the first h1 tag | |
$header = array(); | |
preg_match('/(?<=\<[Hh]2\>)(.*?)(?=\<\/[Hh]2\>)/U', $contents, $header); | |
$header = strip_tags($header[0]); | |
if ( strlen($title) > 0 && strlen($header) > 0 ) { | |
// print the title and h1 tag in combo | |
$map .= '<p class="link"><a href="'.str_replace('&','&',$data).'" title="'.(strlen($header)>0?trim($header):trim($title)).'">'.trim($title).(strlen($header)>0?" - ".trim($header):'').'</a></p>'."\n"; | |
} elseif ( strlen($title) > 0 ) { | |
$map .= '<p class="link"><a href="'.str_replace('&','&',$data).'" title="'.trim($title).'">'.trim($title).'</a></p>'."\n"; | |
} elseif ( strlen($header) > 0 ) { | |
$map .= '<p class="link"><a href="'.str_replace('&','&',$data).'" title="'.trim($header).'">'.trim($header).'</a></p>'."\n"; | |
}; | |
// find description | |
preg_match('/(?<=\<[Mm][Ee][Tt][Aa]\s[Nn][Aa][Mm][Ee]\=\"[Dd]escription\" content\=\")(.*?)(?="\s*?\/?\>)/U', $contents, $description); | |
$description = $description[0]; | |
// print description | |
if ( strlen($description)>0 ) { | |
$map .= '<p class="desc">'.trim($description).'</p>'."\n"; | |
}; | |
// close the file | |
fclose($urlh); | |
}; | |
}; | |
}; | |
/////////// XML PARSE FUNCTIONS HERE ///////////// | |
// the start element function | |
function startElement($xmlParser, $name, $attribs) | |
{ | |
global $currentElement; | |
$currentElement = $name; | |
}; | |
// the end element function | |
function endElement($parser, $name) | |
{ | |
global $currentElement,$currentLoc; | |
if ( $currentElement == 'loc') { | |
parsePage($currentLoc); | |
$currentLoc = ''; | |
}; | |
$currentElement = ''; | |
}; | |
// the character data function | |
function characterData($parser, $data) | |
{ | |
global $currentElement,$currentLoc; | |
// if the current element is loc then it will be a url | |
if ( $currentElement == 'loc' ) { | |
$currentLoc .= $data; | |
}; | |
}; | |
// create parse object | |
$xml_parser = xml_parser_create(); | |
// turn off case folding! | |
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false); | |
// set start and end element functions | |
xml_set_element_handler($xml_parser,"startElement", "endElement"); | |
// set character data function | |
xml_set_character_data_handler($xml_parser, "characterData"); | |
// open xml file | |
if ( !($fp = fopen('sitemap.xml', "r")) ) { | |
die("could not open XML input"); | |
}; | |
// read the file - print error if something went wrong. | |
while ( $data = fread($fp,4096) ) { | |
if ( !xml_parse($xml_parser, $data,feof($fp)) ) { | |
die(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser))); | |
}; | |
}; | |
// close file | |
fclose($fp); | |
$footer = '</body> | |
</html>'; | |
// write output to a file | |
$fp = fopen('sitemap.html', "w+"); | |
fwrite($fp,$header.$map.$footer); | |
fclose($fp); | |
// print output | |
echo $header.$map.$footer; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment