|
<?php |
|
/* |
|
* Markdown Parser |
|
* Version 0.1.0 |
|
* By Sam Thompson <http://openflame-project.org/> |
|
* |
|
* This code has hereby been released into the public domain |
|
* There's absolutely no warranty. |
|
* |
|
* This class works as a parser for Markdown, placing the data in an array to |
|
* be output to be futher manipulated as needed. |
|
*/ |
|
|
|
class MarkdownParser |
|
{ |
|
/* |
|
* @var raw |
|
*/ |
|
public $raw = ''; |
|
|
|
/* |
|
* @var parsed array |
|
*/ |
|
public $parsedText = array(); |
|
|
|
/* |
|
* @var linklist |
|
*/ |
|
public $linkList = array(); |
|
|
|
/* |
|
* Constructor |
|
*/ |
|
public function __construct($raw) |
|
{ |
|
$this->raw = (string) $raw; |
|
} |
|
|
|
// Location flags |
|
const IN_PARAGRAPH = 1; |
|
const IN_LIST = 2; |
|
const IN_PREFORMAT = 4; |
|
const IN_BLOCKQUOTE = 8; |
|
|
|
// Types |
|
const TYPE_H1 = 'h1'; |
|
const TYPE_H2 = 'h2'; |
|
const TYPE_H3 = 'h3'; |
|
const TYPE_H4 = 'h4'; |
|
const TYPE_H5 = 'h5'; |
|
const TYPE_H6 = 'h6'; |
|
const TYPE_UL = 'ul'; |
|
const TYPE_OL = 'ol'; |
|
const TYPE_P = 'p'; |
|
const TYPE_BQ = 'blockquote'; |
|
const TYPE_PRE = 'pre'; |
|
const TYPE_CODE = 'code'; |
|
|
|
/* |
|
* Parse |
|
* @return void |
|
*/ |
|
public function parse() |
|
{ |
|
// Get rid of some annoyances then create an array |
|
$raw = str_replace(array("\r", "\t"), array("", " "), $this->raw); |
|
$lines = explode("\n", $raw); |
|
|
|
$buffer = ''; |
|
$flag = 0; |
|
$items = array(); |
|
|
|
foreach($lines as $k => $line) |
|
{ |
|
$prevLine = ($k - 1 < 0) ? null : $lines[$k - 1]; |
|
|
|
// Code/preformated is always first |
|
if ($flag == self::IN_PREFORMAT || substr($line, 0, 3) == '```') |
|
{ |
|
if (substr($line, 0, 3) == '```' && $flag != self::IN_PREFORMAT) |
|
{ |
|
// Open the pre |
|
$flag = self::IN_PREFORMAT; |
|
$lang = substr(trim($line), 3); |
|
$buffer = ($lang == false) ? self::TYPE_PRE : self::TYPE_CODE; |
|
} |
|
else if(substr($line, 0, 3) == '```') |
|
{ |
|
// Close the pre |
|
$this->parsedText[] = array('type' => $buffer, 'lang' => $lang, 'text' => implode("\n", $items)); |
|
$flag = 0; |
|
} |
|
else |
|
{ |
|
// append to pre |
|
$items[] = $line; |
|
} |
|
|
|
continue; |
|
} |
|
|
|
// Two line header |
|
if ($k > 0 && strlen($line) == strlen($prevLine) && $this->isHeader($line)) |
|
{ |
|
$type = ($line[0] == '=') ? $this->getHeading(1) : $this->getHeading(2); |
|
$this->parsedText[] = array('type' => $type, 'text' => $lines[$k-1]); |
|
continue; |
|
} |
|
|
|
// Is it an in-line header? |
|
if ($line[0] == '#') |
|
{ |
|
for ($i = 0; $i < 6 && $line[$i] == '#'; $i++); |
|
$type = ($i > 6 || $i < 1) ? 1 : $i; |
|
|
|
$line = trim(str_replace(str_repeat('#', $i), '', $line)); |
|
$this->parsedText[] = array('type' => $this->getHeading($type), 'text' => $line); |
|
continue; |
|
} |
|
|
|
// Lists (nasty regexp) |
|
if (preg_match("#^[\s]*[\(]*([0-9]+|\*|\-)[\)\.]*[\s]*([a-z0-9\[\]\(\)\,\.]{1}[a-z0-9\[\]\(\)\,\.\s]*)$#i", $line, $matches)) |
|
{ |
|
$flag = self::IN_LIST; |
|
$buffer = (is_numeric($matches[1])) ? self::TYPE_OL : self::TYPE_UL; |
|
$items[] = $matches[2]; |
|
} |
|
|
|
// Blockquote |
|
if ($line[0] == '>') |
|
{ |
|
$flag = self::IN_BLOCKQUOTE; |
|
$buffer .= trim(substr($line, 1)) . ' '; |
|
continue; |
|
} |
|
|
|
// Linklists |
|
if (preg_match("#^\[([0-9]+)\]: (.*)$#", $line, $matches) == 1) |
|
{ |
|
$this->linkList[(int) $matches[1]] = trim($matches[2]); |
|
continue; |
|
} |
|
|
|
// Normal paragraph |
|
if (preg_match("#[a-z0-9_\-\.\,\s]+#i", $line) != 0 && ($prevLine === '' || $flag == self::IN_PARAGRAPH)) |
|
{ |
|
$flag = self::IN_PARAGRAPH; |
|
$buffer .= trim($line) . ' '; |
|
continue; |
|
} |
|
|
|
// This is a catch all, MUST BE LAST |
|
if (trim($line) === '') |
|
{ |
|
switch ($flag) |
|
{ |
|
case self::IN_PARAGRAPH: |
|
$this->parsedText[] = array('type' => self::TYPE_P, 'text' => $buffer); |
|
break; |
|
|
|
case self::IN_BLOCKQUOTE: |
|
$this->parsedText[] = array('type' => self::TYPE_BQ, 'text' => $buffer); |
|
break; |
|
|
|
case self::IN_LIST: |
|
$this->parsedText[] = array('type' => $buffer, 'items' => $items); |
|
break; |
|
} |
|
|
|
$flag = 0; |
|
$buffer = $lang = ''; |
|
$items = array(); |
|
} |
|
} |
|
} |
|
|
|
/* |
|
* Checks to see if we had a header line this round |
|
*/ |
|
private function isHeader($ln) |
|
{ |
|
if($ln == str_repeat('=', strlen($ln))) |
|
{ |
|
return true; |
|
} |
|
else if($ln == str_repeat('-', strlen($ln))) |
|
{ |
|
return true; |
|
} |
|
|
|
return false; |
|
} |
|
|
|
public function getHeading($level) |
|
{ |
|
switch($level) |
|
{ |
|
case 1: return self::TYPE_H1; break; |
|
case 2: return self::TYPE_H2; break; |
|
case 3: return self::TYPE_H3; break; |
|
case 4: return self::TYPE_H4; break; |
|
case 5: return self::TYPE_H5; break; |
|
case 6: return self::TYPE_H6; break; |
|
} |
|
} |
|
} |
Just a note, this does not touch inline elements yet, working on the proper way to do that.