From e0e0071949b4956d567e9a26f9c81249d45e1467 Mon Sep 17 00:00:00 2001 From: Gary Stidston-Broadbent Date: Sat, 17 Jul 2010 08:55:07 +0800 Subject: [PATCH] Added no_entities and no_markup to config. --- classes/kohana/markdown.php | 340 ++++++++++++++++++------------------ config/markdown.php | 5 +- 2 files changed, 170 insertions(+), 175 deletions(-) diff --git a/classes/kohana/markdown.php b/classes/kohana/markdown.php index 0e4a9d5..1f87672 100644 --- a/classes/kohana/markdown.php +++ b/classes/kohana/markdown.php @@ -83,8 +83,8 @@ class Kohana_Markdown "do_headers" => 10, "do_horizontal_rules" => 20, "do_lists" => 40, - "doCodeBlocks" => 50, - "doBlockQuotes" => 60, + "do_code_blocks" => 50, + "do_block_quotes" => 60, ); /* Transformations that occur *within* block-level tags */ @@ -103,9 +103,9 @@ class Kohana_Markdown * links like [this](). */ "doAutoLinks" => 30, - "encodeAmpsAndAngles" => 40, + "encode_amps_and_angles" => 40, - "doItalicsAndBold" => 50, + "do_italics_and_bold" => 50, "do_hard_breaks" => 60, ); @@ -144,6 +144,8 @@ class Kohana_Markdown $this->_config = $config; $this->suffix = ($this->_config['tab_width'] == 'html') ? '>' : ' />'; $this->tab_width = $this->_config['tab_width']; + $this->no_entities = $this->_config['no_entities']; + $this->no_markup = $this->_config['no_markup']; $this->init_detab(); $this->prepare_italics_and_bold(); @@ -560,7 +562,7 @@ class Kohana_Markdown } # Finally form paragraph and restore hashed blocks. - $text = $this->formParagraphs($text); + $text = $this->form_paragraphs($text); return $text; } @@ -706,12 +708,12 @@ class Kohana_Markdown if (isset($this->urls[$link_id])) { $url = $this->urls[$link_id]; - $url = $this->encodeAttribute($url); + $url = $this->encode_attribute($url); $result = "titles[$link_id] ) ) { $title = $this->titles[$link_id]; - $title = $this->encodeAttribute($title); + $title = $this->encode_attribute($title); $result .= " title=\"$title\""; } @@ -731,11 +733,11 @@ class Kohana_Markdown $url = $matches[3] == '' ? $matches[4] : $matches[3]; $title =& $matches[7]; - $url = $this->encodeAttribute($url); + $url = $this->encode_attribute($url); $result = "encodeAttribute($title); + $title = $this->encode_attribute($title); $result .= " title=\"$title\""; } @@ -811,13 +813,13 @@ class Kohana_Markdown $link_id = strtolower($alt_text); } - $alt_text = $this->encodeAttribute($alt_text); + $alt_text = $this->encode_attribute($alt_text); if (isset($this->urls[$link_id])) { - $url = $this->encodeAttribute($this->urls[$link_id]); + $url = $this->encode_attribute($this->urls[$link_id]); $result = "\"$alt_text\"";titles[$link_id])) { $title = $this->titles[$link_id]; - $title = $this->encodeAttribute($title); + $title = $this->encode_attribute($title); $result .= " title=\"$title\""; } $result .= $this->suffix; @@ -838,13 +840,13 @@ class Kohana_Markdown $url = $matches[3] == '' ? $matches[4] : $matches[3]; $title =& $matches[7]; - $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeAttribute($url); + $alt_text = $this->encode_attribute($alt_text); + $url = $this->encode_attribute($url); $result = "\"$alt_text\"";encodeAttribute($title); + $title = $this->encode_attribute($title); $result .= " title=\"$title\""; } $result .= $this->suffix; @@ -958,7 +960,7 @@ class Kohana_Markdown '; /* We use different prefix before nested lists than top-level lists. - * See extended comment in _ProcessListItems(). + * See extended comment in _process_list_items(). */ if ($this->list_level) { $text = preg_replace_callback('{ @@ -991,39 +993,40 @@ class Kohana_Markdown $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); $list .= "\n"; - $result = $this->processListItems($list, $marker_any_re); + $result = $this->process_list_items($list, $marker_any_re); $result = $this->hash_block("<$list_type>\n" . $result . ""); return "\n". $result ."\n\n"; } - protected function processListItems($list_str, $marker_any_re) + /** + * Process the contents of a single ordered or unordered list, splitting it + * into individual list items. + */ + protected function process_list_items($list_str, $marker_any_re) { - # - # Process the contents of a single ordered or unordered list, splitting it - # into individual list items. - # - # The $this->list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - + /* + * The $this->list_level global keeps track of when we're inside a list. + * Each time we enter a list, we increment it; when we leave a list, + * we decrement. If it's zero, we're not in a list anymore. + * + * We do this because when we're not inside a list, we want to treat + * something like this: + * + * I recommend upgrading to version + * 8. Oops, now this line is treated + * as a sub-list. + * + * As a single paragraph, despite the fact that the second line starts + * with a digit-period-space sequence. + * + * Whereas when we're inside a list (or sub-list), that line will be + * treated as the start of a sub-list. What a kludge, huh? This is + * an aspect of Markdown's syntax that's hard to parse perfectly + * without resorting to mind-reading. Perhaps the solution is to + * change the syntax rules such that sub-lists must start with a + * starting cardinal number; e.g. "1." or "a.". + */ $this->list_level++; /* trim trailing blank lines: */ @@ -1039,13 +1042,13 @@ class Kohana_Markdown (?:(\n+(?=\n))|\n) # tailing blank line = $5 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) }xm', - array(&$this, '_processListItems_callback'), $list_str); + array(&$this, '_process_list_items_callback'), $list_str); $this->list_level--; return $list_str; } - protected function _processListItems_callback($matches) + protected function _process_list_items_callback($matches) { $item = $matches[4]; $leading_line =& $matches[1]; @@ -1053,15 +1056,15 @@ class Kohana_Markdown $marker_space = $matches[3]; $tailing_blank_line =& $matches[5]; + /* Replace marker with the appropriate whitespace indentation */ if ($leading_line || $tailing_blank_line || preg_match('/\n{2,}/', $item)) { - /* Replace marker with the appropriate whitespace indentation */ $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; $item = $this->run_block_gamut($this->outdent($item)."\n"); - } - else { - /* Recursion for sub-lists: */ + + /* Recursion for sub-lists: */ + } else { $item = $this->do_lists($this->outdent($item)); $item = preg_replace('/\n+$/', '', $item); $item = $this->run_span_gamut($item); @@ -1070,11 +1073,11 @@ class Kohana_Markdown return "
  • " . $item . "
  • \n"; } - protected function doCodeBlocks($text) + /** + * Process Markdown `
    ` blocks.
    +	 */
    +	protected function do_code_blocks($text)
     	{
    -	#
    -	#	Process Markdown `
    ` blocks.
    -	#
     		$text = preg_replace_callback('{
     				(?:\n\n|\A\n?)
     				(	            # $1 = the code block -- one or more lines, starting with a space/tab
    @@ -1085,96 +1088,92 @@ class Kohana_Markdown
     				)
     				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
     			}xm',
    -			array(&$this, '_doCodeBlocks_callback'), $text);
    +			array(&$this, '_do_code_blocks_callback'), $text);
     
     		return $text;
     	}
     
    -	protected function _doCodeBlocks_callback($matches)
    +	protected function _do_code_blocks_callback($matches)
     	{
     		$codeblock = $matches[1];
     
     		$codeblock = $this->outdent($codeblock);
     		$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
     
    -		# trim leading newlines and trailing newlines
    +		/* trim leading newlines and trailing newlines */
     		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
     
     		$codeblock = "
    $codeblock\n
    "; return "\n\n".$this->hash_block($codeblock)."\n\n"; } - protected function makeCodeSpan($code) + /** + * Create a code span markup for $code. Called from handleSpanToken. + */ + protected function make_code_span($code) { - # - # Create a code span markup for $code. Called from handleSpanToken. - # $code = htmlspecialchars(trim($code), ENT_NOQUOTES); return $this->hash_part("$code"); } + /** + * Prepare regular expressions for searching emphasis tokens in any + * context. + */ protected function prepare_italics_and_bold() { - # - # Prepare regular expressions for searching emphasis tokens in any - # context. - # foreach ($this->em_relist as $em => $em_re) { foreach ($this->strong_relist as $strong => $strong_re) { - # Construct list of allowed token expressions. + /* Construct list of allowed token expressions. */ $token_relist = array(); if (isset($this->em_strong_relist["$em$strong"])) { $token_relist[] = $this->em_strong_relist["$em$strong"]; } $token_relist[] = $em_re; $token_relist[] = $strong_re; - - # Construct master expression from list. + + /* Construct master expression from list. */ $token_re = '{('. implode('|', $token_relist) .')}'; $this->em_strong_prepared_relist["$em$strong"] = $token_re; } } } - protected function doItalicsAndBold($text) + protected function do_italics_and_bold($text) { $token_stack = array(''); $text_stack = array(''); $em = ''; $strong = ''; $tree_char_em = false; - + while (1) { - # - # Get prepared regular expression for seraching emphasis tokens - # in current context. - # + /* Get prepared regular expression for seraching emphasis tokens in + current context. */ $token_re = $this->em_strong_prepared_relist["$em$strong"]; - - # - # Each loop iteration search for the next emphasis token. - # Each token is then passed to handleSpanToken. - # + + /* Each loop iteration search for next emphasis token. Each token + then passed to handleSpanToken. */ $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); $text_stack[0] .= $parts[0]; $token =& $parts[1]; $text =& $parts[2]; - + if (empty($token)) { - # Reached end of text span: empty stack without emitting. - # any more emphasis. + /* Reached end of text span: empty stack without emitting. any + more emphasis. */ while ($token_stack[0]) { $text_stack[1] .= array_shift($token_stack); $text_stack[0] .= array_shift($text_stack); } break; } - + $token_len = strlen($token); if ($tree_char_em) { - # Reached closing marker while inside a three-char emphasis. + /* Reached closing marker while inside a three-char emphasis. */ if ($token_len == 3) { - # Three-char closing marker, close em and strong. + /* Three-char closing marker, close em and strong. */ array_shift($token_stack); $span = array_shift($text_stack); $span = $this->run_span_gamut($span); @@ -1183,8 +1182,8 @@ class Kohana_Markdown $em = ''; $strong = ''; } else { - # Other closing marker: close one em or strong and - # change current token state to match the other + /* Other closing marker: close one em or strong and change + current token state to match the other */ $token_stack[0] = str_repeat($token{0}, 3-$token_len); $tag = $token_len == 2 ? "strong" : "em"; $span = $text_stack[0]; @@ -1196,8 +1195,8 @@ class Kohana_Markdown $tree_char_em = false; } else if ($token_len == 3) { if ($em) { - # Reached closing marker for both em and strong. - # Closing strong marker: + /* Reached closing marker for both em and strong. + Closing strong marker: */ for ($i = 0; $i < 2; ++$i) { $shifted_token = array_shift($token_stack); $tag = strlen($shifted_token) == 2 ? "strong" : "em"; @@ -1208,8 +1207,8 @@ class Kohana_Markdown $$tag = ''; # $$tag stands for $em or $strong } } else { - # Reached opening three-char emphasis marker. Push on token - # stack; will be handled by the special condition above. + /* Reached opening three-char emphasis marker. Push on token + stack; will be handled by the special condition above. */ $em = $token{0}; $strong = "$em$em"; array_unshift($token_stack, $token); @@ -1218,12 +1217,12 @@ class Kohana_Markdown } } else if ($token_len == 2) { if ($strong) { - # Unwind any dangling emphasis marker: + /* Unwind any dangling emphasis marker: */ if (strlen($token_stack[0]) == 1) { $text_stack[1] .= array_shift($token_stack); $text_stack[0] .= array_shift($text_stack); } - # Closing strong marker: + /* Closing strong marker: */ array_shift($token_stack); $span = array_shift($text_stack); $span = $this->run_span_gamut($span); @@ -1236,10 +1235,10 @@ class Kohana_Markdown $strong = $token; } } else { - # Here $token_len == 1 + /* Here $token_len == 1 */ if ($em) { if (strlen($token_stack[0]) == 1) { - # Closing emphasis marker: + /* Closing emphasis marker: */ array_shift($token_stack); $span = array_shift($text_stack); $span = $this->run_span_gamut($span); @@ -1259,7 +1258,7 @@ class Kohana_Markdown return $text_stack[0]; } - protected function doBlockQuotes($text) + protected function do_block_quotes($text) { $text = preg_replace_callback('/ ( # Wrap whole match in $1 @@ -1271,59 +1270,55 @@ class Kohana_Markdown )+ ) /xm', - array(&$this, '_doBlockQuotes_callback'), $text); + array(&$this, '_do_block_quotes_callback'), $text); return $text; } - protected function _doBlockQuotes_callback($matches) + protected function _do_block_quotes_callback($matches) { $bq = $matches[1]; - # trim one level of quoting - trim whitespace-only lines + /* trim one level of quoting - trim whitespace-only lines */ $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); $bq = $this->run_block_gamut($bq); # recurse $bq = preg_replace('/^/m', " ", $bq); - # These leading spaces cause problem with
     content, 
    -		# so we need to fix that:
    +		/* Leading spaces cause problem with 
     content, so fix that: */
     		$bq = preg_replace_callback('{(\s*
    .+?
    )}sx', - array(&$this, '_doBlockQuotes_callback2'), $bq); + array(&$this, '_do_block_quotes_callback2'), $bq); return "\n". $this->hash_block("
    \n$bq\n
    ")."\n\n"; } - protected function _doBlockQuotes_callback2($matches) + protected function _do_block_quotes_callback2($matches) { $pre = $matches[1]; $pre = preg_replace('/^ /m', '', $pre); return $pre; } - protected function formParagraphs($text) + /** + * Params: + * $text - string to process with html

    tags + */ + protected function form_paragraphs($text) { - # - # Params: - # $text - string to process with html

    tags - # - # Strip leading and trailing lines: + /* Strip leading and trailing lines: */ $text = preg_replace('/\A\n+|\n+\z/', '', $text); $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); - # - # Wrap

    tags and unhashify HTML blocks - # + /* Wrap

    tags and unhashify HTML blocks */ foreach ($grafs as $key => $value) { if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { - # Is a paragraph. + /* Is a paragraph. */ $value = $this->run_span_gamut($value); $value = preg_replace('/^([ ]*)/', "

    ", $value); $value .= "

    "; $grafs[$key] = $this->unhash($value); } else { - # Is a block. - # Modify elements of @grafs in-place... + /* Is a block. Modify elements of @grafs in-place... */ $graf = $value; $block = $this->html_hashes[$graf]; $graf = $block; @@ -1334,33 +1329,33 @@ class Kohana_Markdown return implode("\n\n", $grafs); } - protected function encodeAttribute($text) + /** + * Encode text for a double-quoted HTML attribute. This function + * is *not* suitable for attributes enclosed in single quotes. + */ + protected function encode_attribute($text) { - # - # Encode text for a double-quoted HTML attribute. This function - # is *not* suitable for attributes enclosed in single quotes. - # - $text = $this->encodeAmpsAndAngles($text); + $text = $this->encode_amps_and_angles($text); $text = str_replace('"', '"', $text); return $text; } - protected function encodeAmpsAndAngles($text) + /** + * Smart processing for ampersands and angle brackets that need to + * be encoded. Valid character entities are left alone unless the + * no-entities mode is set. + */ + protected function encode_amps_and_angles($text) { - # - # Smart processing for ampersands and angle brackets that need to - # be encoded. Valid character entities are left alone unless the - # no-entities mode is set. - # if ($this->no_entities) { $text = str_replace('&', '&', $text); } else { - # Ampersand-encoding based entirely on Nat Irons's Amputator - # MT plugin: + /* Ampersand-encoding based entirely on Nat Irons's Amputator + MT plugin: */ $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', '&', $text);; } - # Encode remaining <'s + /* Encode remaining <'s */ $text = str_replace('<', '<', $text); return $text; @@ -1371,7 +1366,7 @@ class Kohana_Markdown $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', array(&$this, '_doAutoLinks_url_callback'), $text); - # Email addresses: + /* Email addresses: */ $text = preg_replace_callback('{ < (?:mailto:)? @@ -1397,7 +1392,7 @@ class Kohana_Markdown protected function _doAutoLinks_url_callback($matches) { - $url = $this->encodeAttribute($matches[1]); + $url = $this->encode_attribute($matches[1]); $link = "
    $url"; return $this->hash_part($link); } @@ -1409,34 +1404,33 @@ class Kohana_Markdown return $this->hash_part($link); } + /** + * Input: an email address, e.g. "foo@example.com" + * + * Output: the email address as a mailto link, with each character + * of the address encoded as either a decimal or hex entity, in + * the hopes of foiling most address harvesting spam bots. E.g.: + * + *

    foo@exampl + * e.com

    + * + * Based by a filter by Matthew Wickline, posted to BBEdit-Talk. + * With some optimizations by Milian Wolff. + */ protected function encodeEmailAddress($addr) { - # - # Input: an email address, e.g. "foo@example.com" - # - # Output: the email address as a mailto link, with each character - # of the address encoded as either a decimal or hex entity, in - # the hopes of foiling most address harvesting spam bots. E.g.: - # - #

    foo@exampl - # e.com

    - # - # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. - # With some optimizations by Milian Wolff. - # $addr = "mailto:" . $addr; $chars = preg_split('/(? $char) { $ord = ord($char); - # Ignore non-ascii chars. + /* Ignore non-ascii chars. */ if ($ord < 128) { $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. - # roughly 10% raw, 45% hex, 45% dec - # '@' *must* be encoded. I insist. + /* roughly 10% raw, 45% hex, 45% dec '@' *must* be encoded. I insist. */ if ($r > 90 && $char != '@') /* do nothing */; else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; else $chars[$key] = '&#'.$ord.';'; @@ -1450,12 +1444,12 @@ class Kohana_Markdown return $addr; } + /** + * Take the string $str and parse it into tokens, hashing embeded HTML, + * escaped characters and handling code spans. + */ protected function parseSpan($str) { - # - # Take the string $str and parse it into tokens, hashing embeded HTML, - # escaped characters and handling code spans. - # $output = ''; $span_re = '{ @@ -1506,12 +1500,12 @@ class Kohana_Markdown return $output; } + /** + * Handle $token provided by parseSpan by determining its nature and + * returning the corresponding value that should replace it. + */ protected function handleSpanToken($token, &$str) { - # - # Handle $token provided by parseSpan by determining its nature and - # returning the corresponding value that should replace it. - # switch ($token{0}) { case "\\": return $this->hash_part("&#". ord($token{1}). ";"); @@ -1521,7 +1515,7 @@ class Kohana_Markdown $str, $matches)) { $str = $matches[2]; - $codespan = $this->makeCodeSpan($matches[1]); + $codespan = $this->make_code_span($matches[1]); return $this->hash_part($codespan); } return $token; // return as text since no ending marker found. @@ -1530,19 +1524,19 @@ class Kohana_Markdown } } + /** + * Remove one level of line-leading tabs or spaces + */ protected function outdent($text) { - # - # Remove one level of line-leading tabs or spaces - # return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); } + /** + * Replace tabs with the appropriate amount of space. + */ protected function detab($text) { - # - # Replace tabs with the appropriate amount of space. - # # For each line we separate the line in blocks delemited by # tab characters. Then we reconstruct every line by adding the # appropriate number of space between each blocks. @@ -1572,25 +1566,25 @@ class Kohana_Markdown return $line; } + /** + * Check for the availability of the function in the `utf8_strlen` property + * (initially `mb_strlen`). If the function is not available, create a + * function that will loosely count the number of UTF-8 characters with a + * regular expression. + */ protected function init_detab() { - # - # Check for the availability of the function in the `utf8_strlen` property - # (initially `mb_strlen`). If the function is not available, create a - # function that will loosely count the number of UTF-8 characters with a - # regular expression. - # if (function_exists($this->utf8_strlen)) return; $this->utf8_strlen = create_function('$text', 'return preg_match_all( "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", $text, $m);'); } + /** + * Swap back in all the tags hashed by _Hash_HTML_blocks. + */ protected function unhash($text) { - # - # Swap back in all the tags hashed by _Hash_HTML_blocks. - # return preg_replace_callback('/(.)\x1A[0-9]+\1/', array(&$this, '_unhash_callback'), $text); } diff --git a/config/markdown.php b/config/markdown.php index e800f21..5037003 100644 --- a/config/markdown.php +++ b/config/markdown.php @@ -1,11 +1,12 @@ array + 'default' => array ( 'type' => 'xhtml', // html or xhtml 'tab_width' => 4, // Tab width for output - // Many more options to come + 'no_entities' => FALSE, + 'no_markup' => FALSE ) ); ?>