#
# PHP port by Michel Fortin
#
or tags.
$prev_token_last_char = ""; # This is a cheat, used to get some context
# for one-character tokens that consist of
# just a quote char. What we do is remember
# the last character of the previous text
# token, to use as context to curl single-
# character quote tokens correctly.
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
$last_char = substr($t, -1); # Remember last char of this token before processing.
if (! $in_pre) {
$t = ProcessEscapes($t);
if ($convert_quot) {
$t = preg_replace('/"/', '"', $t);
}
if ($do_dashes) {
if ($do_dashes == 1) $t = EducateDashes($t);
if ($do_dashes == 2) $t = EducateDashesOldSchool($t);
if ($do_dashes == 3) $t = EducateDashesOldSchoolInverted($t);
}
if ($do_ellipses) $t = EducateEllipses($t);
# Note: backticks need to be processed before quotes.
if ($do_backticks) {
$t = EducateBackticks($t);
if ($do_backticks == 2) $t = EducateSingleBackticks($t);
}
if ($do_quotes) {
if ($t == "'") {
# Special case: single-character ' token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "’";
}
else {
$t = "‘";
}
}
else if ($t == '"') {
# Special case: single-character " token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "”";
}
else {
$t = "“";
}
}
else {
# Normal case:
$t = EducateQuotes($t);
}
}
if ($do_stupefy) $t = StupefyEntities($t);
}
$prev_token_last_char = $last_char;
$result .= $t;
}
}
return $result;
}
function SmartQuotes($text, $attr = NULL, $ctx = NULL) {
global $smartypants_attr, $sp_tags_to_skip;
# Paramaters:
$text; # text to be parsed
$attr; # value of the smart_quotes="" attribute
$ctx; # MT context object (unused)
if ($attr == NULL) $attr = $smartypants_attr;
$do_backticks; # should we educate ``backticks'' -style quotes?
if ($attr == 0) {
# do nothing;
return $text;
}
else if ($attr == 2) {
# smarten ``backticks'' -style quotes
$do_backticks = 1;
}
else {
$do_backticks = 0;
}
# Special case to handle quotes at the very end of $text when preceded by
# an HTML tag. Add a space to give the quote education algorithm a bit of
# context, so that it can guess correctly that it's a closing quote:
$add_extra_space = 0;
if (preg_match("/>['\"]\\z/", $text)) {
$add_extra_space = 1; # Remember, so we can trim the extra space later.
$text .= " ";
}
$tokens = _TokenizeHTML($text);
$result = '';
$in_pre = 0; # Keep track of when we're inside or tags
$prev_token_last_char = ""; # This is a cheat, used to get some context
# for one-character tokens that consist of
# just a quote char. What we do is remember
# the last character of the previous text
# token, to use as context to curl single-
# character quote tokens correctly.
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
$last_char = substr($t, -1); # Remember last char of this token before processing.
if (! $in_pre) {
$t = ProcessEscapes($t);
if ($do_backticks) {
$t = EducateBackticks($t);
}
if ($t == "'") {
# Special case: single-character ' token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "’";
}
else {
$t = "‘";
}
}
else if ($t == '"') {
# Special case: single-character " token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "”";
}
else {
$t = "“";
}
}
else {
# Normal case:
$t = EducateQuotes($t);
}
}
$prev_token_last_char = $last_char;
$result .= $t;
}
}
if ($add_extra_space) {
preg_replace('/ \z/', '', $result); # Trim trailing space if we added one earlier.
}
return $result;
}
function SmartDashes($text, $attr = NULL, $ctx = NULL) {
global $smartypants_attr, $sp_tags_to_skip;
# Paramaters:
$text; # text to be parsed
$attr; # value of the smart_dashes="" attribute
$ctx; # MT context object (unused)
if ($attr == NULL) $attr = $smartypants_attr;
# reference to the subroutine to use for dash education, default to EducateDashes:
$dash_sub_ref = 'EducateDashes';
if ($attr == 0) {
# do nothing;
return $text;
}
else if ($attr == 2) {
# use old smart dash shortcuts, "--" for en, "---" for em
$dash_sub_ref = 'EducateDashesOldSchool';
}
else if ($attr == 3) {
# inverse of 2, "--" for em, "---" for en
$dash_sub_ref = 'EducateDashesOldSchoolInverted';
}
$tokens;
$tokens = _TokenizeHTML($text);
$result = '';
$in_pre = 0; # Keep track of when we're inside or tags
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
if (! $in_pre) {
$t = ProcessEscapes($t);
$t = $dash_sub_ref($t);
}
$result .= $t;
}
}
return $result;
}
function SmartEllipses($text, $attr = NULL, $ctx = NULL) {
# Paramaters:
$text; # text to be parsed
$attr; # value of the smart_ellipses="" attribute
$ctx; # MT context object (unused)
if ($attr == NULL) $attr = $smartypants_attr;
if ($attr == 0) {
# do nothing;
return $text;
}
$tokens;
$tokens = _TokenizeHTML($text);
$result = '';
$in_pre = 0; # Keep track of when we're inside or tags
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
if (! $in_pre) {
$t = ProcessEscapes($t);
$t = EducateEllipses($t);
}
$result .= $t;
}
}
return $result;
}
function EducateQuotes($_) {
#
# Parameter: String.
#
# Returns: The string, with "educated" curly quote HTML entities.
#
# Example input: "Isn't this fun?"
# Example output: “Isn’t this fun?”
#
# Make our own "punctuation" character class, because the POSIX-style
# [:PUNCT:] is only available in Perl 5.6 or later:
$punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
# Special case if the very first character is a quote
# followed by punctuation at a non-word-break. Close the quotes by brute force:
$_ = preg_replace(
array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
array('’', '”'), $_);
# Special case for double sets of quotes, e.g.:
# He said, "'Quoted' words in a larger quote."
$_ = preg_replace(
array("/\"'(?=\w)/", "/'\"(?=\w)/"),
array('“‘', '‘“'), $_);
# Special case for decade abbreviations (the '80s):
$_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
$close_class = '[^\ \t\r\n\[\{\(\-]';
$dec_dashes = '&\#8211;|&\#8212;';
# Get most opening single quotes:
$_ = preg_replace("{
(
\\s | # a whitespace char, or
| # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
$dec_dashes | # or decimal entities
&\\#x201[34]; # or hex
)
' # the quote
(?=\\w) # followed by a word character
}x", '\1‘', $_);
# Single closing quotes:
$_ = preg_replace("{
($close_class)?
'
(?(1)| # If $1 captured, then do nothing;
(?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
) # char or an 's' at a word ending position. This
# is a special case to handle something like:
# \"Custer's Last Stand.\"
}xi", '\1’', $_);
# Any remaining single quotes should be opening ones:
$_ = str_replace("'", '‘', $_);
# Get most opening double quotes:
$_ = preg_replace("{
(
\\s | # a whitespace char, or
| # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
$dec_dashes | # or decimal entities
&\\#x201[34]; # or hex
)
\" # the quote
(?=\\w) # followed by a word character
}x", '\1“', $_);
# Double closing quotes:
$_ = preg_replace("{
($close_class)?
\"
(?(1)|(?=\\s)) # If $1 captured, then do nothing;
# if not, then make sure the next char is whitespace.
}x", '\1”', $_);
# Any remaining quotes should be opening ones.
$_ = str_replace('"', '“', $_);
return $_;
}
function EducateBackticks($_) {
#
# Parameter: String.
# Returns: The string, with ``backticks'' -style double quotes
# translated into HTML curly quote entities.
#
# Example input: ``Isn't this fun?''
# Example output: “Isn't this fun?”
#
$_ = str_replace(array("``", "''",),
array('“', '”'), $_);
return $_;
}
function EducateSingleBackticks($_) {
#
# Parameter: String.
# Returns: The string, with `backticks' -style single quotes
# translated into HTML curly quote entities.
#
# Example input: `Isn't this fun?'
# Example output: ‘Isn’t this fun?’
#
$_ = str_replace(array("`", "'",),
array('‘', '’'), $_);
return $_;
}
function EducateDashes($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an em-dash HTML entity.
#
$_ = str_replace('--', '—', $_);
return $_;
}
function EducateDashesOldSchool($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an en-dash HTML entity, and each "---" translated to
# an em-dash HTML entity.
#
# em en
$_ = str_replace(array("---", "--",),
array('—', '–'), $_);
return $_;
}
function EducateDashesOldSchoolInverted($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an em-dash HTML entity, and each "---" translated to
# an en-dash HTML entity. Two reasons why: First, unlike the
# en- and em-dash syntax supported by
# EducateDashesOldSchool(), it's compatible with existing
# entries written before SmartyPants 1.1, back when "--" was
# only used for em-dashes. Second, em-dashes are more
# common than en-dashes, and so it sort of makes sense that
# the shortcut should be shorter to type. (Thanks to Aaron
# Swartz for the idea.)
#
# en em
$_ = str_replace(array("---", "--",),
array('–', '—'), $_);
return $_;
}
function EducateEllipses($_) {
#
# Parameter: String.
# Returns: The string, with each instance of "..." translated to
# an ellipsis HTML entity. Also converts the case where
# there are spaces between the dots.
#
# Example input: Huh...?
# Example output: Huh…?
#
$_ = str_replace(array("...", ". . .",), '…', $_);
return $_;
}
function StupefyEntities($_) {
#
# Parameter: String.
# Returns: The string, with each SmartyPants HTML entity translated to
# its ASCII counterpart.
#
# Example input: “Hello — world.”
# Example output: "Hello -- world."
#
# en-dash em-dash
$_ = str_replace(array('–', '—'),
array('-', '--'), $_);
# single quote open close
$_ = str_replace(array('‘', '’'), "'", $_);
# double quote open close
$_ = str_replace(array('“', '”'), '"', $_);
$_ = str_replace('…', '...', $_); # ellipsis
return $_;
}
function ProcessEscapes($_) {
#
# Parameter: String.
# Returns: The string, with after processing the following backslash
# escape sequences. This is useful if you want to force a "dumb"
# quote or other character to appear.
#
# Escape Value
# ------ -----
# \\ \
# \" "
# \' '
# \. .
# \- -
# \` `
#
$_ = str_replace(
array('\\', '\"', "\'", '\.', '\-', '\`'),
array('\', '"', ''', '.', '-', '`'), $_);
return $_;
}
# _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
# We only define it if it is not already defined.
if (!function_exists('_TokenizeHTML')) :
function _TokenizeHTML($str) {
#
# Parameter: String containing HTML markup.
# Returns: An array of the tokens comprising the input
# string. Each token is either a tag (possibly with nested,
# tags contained therein, such as , or a
# run of text between tags. Each element of the array is a
# two-element array; the first is either 'tag' or 'text';
# the second is the actual value.
#
#
# Regular expression derived from the _tokenize() subroutine in
# Brad Choate's MTRegex plugin.
#
#
$index = 0;
$tokens = array();
$match = '(?s:)|'. # comment
'(?s:<\?.*?\?>)|'. # processing instruction
# regular tags
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (++$index % 2 && $part != '')
$tokens[] = array('text', $part);
else
$tokens[] = array('tag', $part);
}
return $tokens;
}
endif;
/*
PHP SmartyPants
===============
Description
-----------
This is a PHP translation of the original SmartyPants quote educator written in
Perl by John Gruber.
SmartyPants is a web publishing utility that translates plain ASCII
punctuation characters into "smart" typographic punctuation HTML
entities. SmartyPants can perform the following transformations:
* Straight quotes (`"` and `'`) into "curly" quote HTML entities
* Backticks-style quotes (` ``like this'' `) into "curly" quote HTML
entities
* Dashes (`--` and `---`) into en- and em-dash entities
* Three consecutive dots (`...`) into an ellipsis entity
SmartyPants does not modify characters within ``, ``, ``,
`