typography_helper.php

Go to the documentation of this file.
00001 <?php  if ( ! defined('BASEPATH')) exit('No direct script access allowed');
00002 /**
00003  * CodeIgniter
00004  *
00005  * An open source application development framework for PHP 4.3.2 or newer
00006  *
00007  * @package             CodeIgniter
00008  * @author              ExpressionEngine Dev Team
00009  * @copyright   Copyright (c) 2006, EllisLab, Inc.
00010  * @license             http://codeigniter.com/user_guide/license.html
00011  * @link                http://codeigniter.com
00012  * @since               Version 1.0
00013  * @filesource
00014  */
00015 
00016 // ------------------------------------------------------------------------
00017 
00018 /**
00019  * CodeIgniter Typography Helpers
00020  *
00021  * @package             CodeIgniter
00022  * @subpackage  Helpers
00023  * @category    Helpers
00024  * @author              ExpressionEngine Dev Team
00025  * @link                http://codeigniter.com/user_guide/helpers/typography_helper.html
00026  */
00027 
00028 // ------------------------------------------------------------------------
00029 
00030 /**
00031  * Convert newlines to HTML line breaks except within PRE tags
00032  *
00033  * @access      public
00034  * @param       string
00035  * @return      string
00036  */     
00037 if ( ! function_exists('nl2br_except_pre'))
00038 {
00039         function nl2br_except_pre($str)
00040         {
00041                 $ex = explode("pre>",$str);
00042                 $ct = count($ex);
00043         
00044                 $newstr = "";
00045                 for ($i = 0; $i < $ct; $i++)
00046                 {
00047                         if (($i % 2) == 0)
00048                         {
00049                                 $newstr .= nl2br($ex[$i]);
00050                         }
00051                         else
00052                         {
00053                                 $newstr .= $ex[$i];
00054                         }
00055                 
00056                         if ($ct - 1 != $i)
00057                                 $newstr .= "pre>";
00058                 }
00059         
00060                 return $newstr;
00061         }
00062 }
00063         
00064 // ------------------------------------------------------------------------
00065 
00066 /**
00067  * Auto Typography Wrapper Function
00068  *
00069  *
00070  * @access      public
00071  * @param       string
00072  * @return      string
00073  */
00074 if ( ! function_exists('auto_typography'))
00075 {
00076         function auto_typography($str)
00077         {
00078                 $TYPE = new Auto_typography();
00079                 return $TYPE->convert($str);
00080         }
00081 }
00082         
00083 // ------------------------------------------------------------------------
00084 
00085 /**
00086  * Auto Typography Class
00087  *
00088  *
00089  * @access              private
00090  * @category    Helpers
00091  * @author              ExpressionEngine Dev Team
00092  * @link                http://codeigniter.com/user_guide/helpers/
00093  */
00094 class Auto_typography {
00095 
00096         // Block level elements that should not be wrapped inside <p> tags
00097         var $block_elements = 'div|blockquote|pre|code|h\d|script|ol|ul';
00098         
00099         // Elements that should not have <p> and <br /> tags within them.
00100         var $skip_elements      = 'pre|ol|ul';
00101         
00102         // Tags we want the parser to completely ignore when splitting the string.
00103         var $ignore_elements = 'a|b|i|em|strong|span|img|li';   
00104 
00105 
00106         /**
00107          * Main Processing Function
00108          *
00109          */
00110         function convert($str)
00111         {
00112                 if ($str == '')
00113                 {
00114                         return '';
00115                 }
00116                 
00117                 $str = ' '.$str.' ';
00118                 
00119                 // Standardize Newlines to make matching easier
00120                 if (strpos($str, "\r") !== FALSE)
00121                 {
00122                         $str = str_replace(array("\r\n", "\r"), "\n", $str);                    
00123                 }
00124                 
00125                 /*
00126                  * Reduce line breaks
00127                  *
00128                  * If there are more than two consecutive line
00129                  * breaks we'll compress them down to a maximum
00130                  * of two since there's no benefit to more.
00131                  *
00132                  */
00133                 $str = preg_replace("/\n\n+/", "\n\n", $str);
00134 
00135                 /*
00136                  * Convert quotes within tags to temporary marker
00137                  *
00138                  * We don't want quotes converted within
00139                  * tags so we'll temporarily convert them to
00140                  * {@DQ} and {@SQ}
00141                  *
00142                  */                     
00143                 if (preg_match_all("#<.+?>#si", $str, $matches))
00144                 {
00145                         for ($i = 0; $i < count($matches['0']); $i++)
00146                         {
00147                                 $str = str_replace($matches['0'][$i],
00148                                                                         str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
00149                                                                         $str);
00150                         }
00151                 }
00152         
00153 
00154                 /*
00155                  * Add closing/opening paragraph tags before/after "block" elements
00156                  *
00157                  * Since block elements (like <blockquotes>, <pre>, etc.) do not get
00158                  * wrapped in paragraph tags we will add a closing </p> tag just before
00159                  * each block element starts and an opening <p> tag right after the block element
00160                  * ends.  Later on we'll do some further clean up.
00161                  *
00162                  */
00163                 $str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
00164                 $str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
00165         
00166                 /*
00167                  * Convert "ignore" tags to temporary marker
00168                  *
00169                  * The parser splits out the string at every tag
00170                  * it encounters.  Certain inline tags, like image
00171                  * tags, links, span tags, etc. will be adversely
00172                  * affected if they are split out so we'll convert
00173                  * the opening < temporarily to: {@TAG}
00174                  *
00175                  */             
00176                 $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);      
00177                 
00178                 /*
00179                  * Split the string at every tag
00180                  *
00181                  * This creates an array with this prototype:
00182                  *
00183                  *      [array]
00184                  *      {
00185                  *              [0] = <opening tag>
00186                  *              [1] = Content contained between the tags
00187                  *              [2] = <closing tag>
00188                  *              Etc...
00189                  *      }
00190                  *
00191                  */                     
00192                 $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
00193                 
00194                 /*
00195                  * Build our finalized string
00196                  *
00197                  * We'll cycle through the array, skipping tags,
00198                  * and processing the contained text
00199                  *
00200                  */                     
00201                 $str = '';
00202                 $process = TRUE;
00203                 foreach ($chunks as $chunk)
00204                 {
00205                         /*
00206                          * Are we dealing with a tag?
00207                          *
00208                          * If so, we'll skip the processing for this cycle.
00209                          * Well also set the "process" flag which allows us
00210                          * to skip <pre> tags and a few other things.
00211                          *
00212                          */
00213                         if (preg_match("#<(/*)(".$this->block_elements.").*?>#", $chunk, $match))
00214                         {
00215                                 if (preg_match("#".$this->skip_elements."#", $match['2']))
00216                                 {
00217                                         $process =  ($match['1'] == '/') ? TRUE : FALSE;                
00218                                 }
00219                 
00220                                 $str .= $chunk;
00221                                 continue;
00222                         }
00223                 
00224                         if ($process == FALSE)
00225                         {
00226                                 $str .= $chunk;
00227                                 continue;
00228                         }
00229                         
00230                         //  Convert Newlines into <p> and <br /> tags
00231                         $str .= $this->format_newlines($chunk);
00232                 }
00233 
00234                 // FINAL CLEAN UP
00235                 // IMPORTANT:  DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
00236                 
00237                 /*
00238                  * Clean up paragraph tags before/after "block" elements
00239                  *
00240                  * Earlier we added <p></p> tags before/after block level elements.
00241                  * Then, we added paragraph tags around double line breaks.  This
00242                  * potentially created incorrectly formatted paragraphs so we'll
00243                  * clean it up here.
00244                  *
00245                  */
00246                 $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str);
00247                 $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str);
00248 
00249                 // Convert Quotes and other characters
00250                 $str = $this->format_characters($str);
00251                 
00252                 // Fix an artifact that happens during the paragraph replacement
00253                 $str = preg_replace('#(<p>\n*</p>)#', '', $str);
00254 
00255                 // If the user submitted their own paragraph tags with class data
00256                 // in them we will retain them instead of using our tags.
00257                 $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
00258 
00259                 // Final clean up
00260                 $str = str_replace(
00261                                                         array(
00262                                                                         '</p></p>',
00263                                                                         '</p><p>',
00264                                                                         '<p> ',
00265                                                                         ' </p>',
00266                                                                         '{@TAG}',
00267                                                                         '{@DQ}',
00268                                                                         '{@SQ}',
00269                                                                         '<p></p>'
00270                                                                 ),
00271                                                         array(
00272                                                                         '</p>',
00273                                                                         '<p>',
00274                                                                         '<p>',
00275                                                                         '</p>',
00276                                                                         '<',
00277                                                                         '"',
00278                                                                         "'",
00279                                                                         ''
00280                                                                 ),
00281                                                         $str
00282                                                 );
00283                 
00284                 return $str;
00285         }
00286         
00287         // --------------------------------------------------------------------
00288 
00289         /**
00290          * Format Characters
00291          *
00292          * This function mainly converts double and single quotes
00293          * to entities, but since these are directional, it does
00294          * it based on some rules.  It also converts em-dashes
00295          * and a couple other things.
00296          */
00297         function format_characters($str)
00298         {       
00299                 $table = array(
00300                                                 ' "'            => " &#8220;",
00301                                                 '" '            => "&#8221; ",
00302                                                 " '"            => " &#8216;",
00303                                                 "' "            => "&#8217; ",
00304                                                 
00305                                                 '>"'            => ">&#8220;",
00306                                                 '"<'            => "&#8221;<",
00307                                                 ">'"            => ">&#8216;",
00308                                                 "'<"            => "&#8217;<",
00309 
00310                                                 "\"."           => "&#8221;.",
00311                                                 "\","           => "&#8221;,",
00312                                                 "\";"           => "&#8221;;",
00313                                                 "\":"           => "&#8221;:",
00314                                                 "\"!"           => "&#8221;!",
00315                                                 "\"?"           => "&#8221;?",
00316                                                 
00317                                                 ".  "           => ".&nbsp; ",
00318                                                 "?  "           => "?&nbsp; ",
00319                                                 "!  "           => "!&nbsp; ",
00320                                                 ":  "           => ":&nbsp; ",
00321                                         );
00322 
00323                 // These deal with quotes within quotes, like:  "'hi here'"
00324                 $start = 0;
00325                 $space = array("\n", "\t", " ");
00326                 
00327                 while(TRUE)
00328                 {
00329                         $current = strpos(substr($str, $start), "\"'");
00330                         
00331                         if ($current === FALSE) break;
00332                         
00333                         $one_before = substr($str, $start+$current-1, 1);
00334                         $one_after = substr($str, $start+$current+2, 1);
00335                         
00336                         if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
00337                         {
00338                                 $str = str_replace(     $one_before."\"'".$one_after,
00339                                                                         $one_before."&#8220;&#8216;".$one_after,
00340                                                                         $str);
00341                         }
00342                         elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
00343                         {
00344                                 $str = str_replace(     $one_before."\"'".$one_after,
00345                                                                         $one_before."&#8221;&#8217;".$one_after,
00346                                                                         $str);
00347                         }
00348                         
00349                         $start = $start+$current+2;
00350                 }
00351                 
00352                 $start = 0;
00353                 
00354                 while(TRUE)
00355                 {
00356                         $current = strpos(substr($str, $start), "'\"");
00357                         
00358                         if ($current === FALSE) break;
00359                         
00360                         $one_before = substr($str, $start+$current-1, 1);
00361                         $one_after = substr($str, $start+$current+2, 1);
00362                         
00363                         if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
00364                         {
00365                                 $str = str_replace(     $one_before."'\"".$one_after,
00366                                                                         $one_before."&#8216;&#8220;".$one_after,
00367                                                                         $str);
00368                         }
00369                         elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
00370                         {
00371                                 $str = str_replace(     $one_before."'\"".$one_after,
00372                                                                         $one_before."&#8217;&#8221;".$one_after,
00373                                                                         $str);
00374                         }
00375                         
00376                         $start = $start+$current+2;
00377                 }
00378                 
00379                 // Are there quotes within a word, as in:  ("something")
00380                 if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
00381                 {
00382                         for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
00383                         {
00384                                 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
00385                                 {
00386                                         $str = str_replace(     $matches['0'][$i],
00387                                                                                 $matches['1'][$i]."&#8220;".$matches['2'][$i]."&#8221;".$matches['3'][$i],
00388                                                                                 $str);
00389                                 }
00390                         }
00391                 }
00392                 
00393                 if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
00394                 {
00395                         for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
00396                         {
00397                                 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
00398                                 {
00399                                         $str = str_replace(     $matches['0'][$i],
00400                                                                                 $matches['1'][$i]."&#8216;".$matches['2'][$i]."&#8217;".$matches['3'][$i],
00401                                                                                 $str);
00402                                 }
00403                         }
00404                 }
00405                 
00406                 // How about one apostrophe, as in Rick's
00407                 $start = 0;
00408                 
00409                 while(TRUE)
00410                 {
00411                         $current = strpos(substr($str, $start), "'");
00412                         
00413                         if ($current === FALSE) break;
00414                         
00415                         $one_before = substr($str, $start+$current-1, 1);
00416                         $one_after = substr($str, $start+$current+1, 1);
00417                         
00418                         if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
00419                         {
00420                                 $str = str_replace(     $one_before."'".$one_after,
00421                                                                         $one_before."&#8217;".$one_after,
00422                                                                         $str);
00423                         }
00424                         
00425                         $start = $start+$current+2;
00426                 }
00427 
00428                 // Em-dashes
00429                 $start = 0;
00430                 while(TRUE)
00431                 {
00432                         $current = strpos(substr($str, $start), "--");
00433                         
00434                         if ($current === FALSE) break;
00435                         
00436                         $one_before = substr($str, $start+$current-1, 1);
00437                         $one_after = substr($str, $start+$current+2, 1);
00438                         $two_before = substr($str, $start+$current-2, 1);
00439                         $two_after = substr($str, $start+$current+3, 1);
00440                         
00441                         if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
00442                                 OR
00443                                 ( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
00444                                 )
00445                         {
00446                                 $str = str_replace(     $two_before.$one_before."--".$one_after.$two_after,
00447                                                                         $two_before.trim($one_before)."&#8212;".trim($one_after).$two_after,
00448                                                                         $str);
00449                         }
00450                         
00451                         $start = $start+$current+2;
00452                 }
00453                 
00454                 // Ellipsis
00455                 $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1&#8230;\\2", $str);
00456                 $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1&#8230;\\2", $str);
00457                 
00458                 // Run the translation array we defined above           
00459                 $str = str_replace(array_keys($table), array_values($table), $str);
00460                 
00461                 // If there are any stray double quotes we'll catch them here
00462                 
00463                 $start = 0;
00464                 
00465                 while(TRUE)
00466                 {
00467                         $current = strpos(substr($str, $start), '"');
00468                         
00469                         if ($current === FALSE) break;
00470                         
00471                         $one_before = substr($str, $start+$current-1, 1);
00472                         $one_after = substr($str, $start+$current+1, 1);
00473                         
00474                         if ( ! in_array($one_after, $space, TRUE))
00475                         {
00476                                 $str = str_replace(     $one_before.'"'.$one_after,
00477                                                                         $one_before."&#8220;".$one_after,
00478                                                                         $str);
00479                         }
00480                         elseif( ! in_array($one_before, $space, TRUE))
00481                         {
00482                                 $str = str_replace(     $one_before."'".$one_after,
00483                                                                         $one_before."&#8221;".$one_after,
00484                                                                         $str);
00485                         }
00486                         
00487                         $start = $start+$current+2;
00488                 }
00489                 
00490                 $start = 0;
00491                 
00492                 while(TRUE)
00493                 {
00494                         $current = strpos(substr($str, $start), "'");
00495                         
00496                         if ($current === FALSE) break;
00497                         
00498                         $one_before = substr($str, $start+$current-1, 1);
00499                         $one_after = substr($str, $start+$current+1, 1);
00500                         
00501                         if ( ! in_array($one_after, $space, TRUE))
00502                         {
00503                                 $str = str_replace(     $one_before."'".$one_after,
00504                                                                         $one_before."&#8216;".$one_after,
00505                                                                         $str);
00506                         }
00507                         elseif( ! in_array($one_before, $space, TRUE))
00508                         {
00509                                 $str = str_replace(     $one_before."'".$one_after,
00510                                                                         $one_before."&#8217;".$one_after,
00511                                                                         $str);
00512                         }
00513                         
00514                         $start = $start+$current+2;
00515                 }
00516                 
00517                 return $str;
00518         }
00519         
00520         // --------------------------------------------------------------------
00521 
00522         /**
00523          * Format Newlines
00524          *
00525          * Converts newline characters into either <p> tags or <br />
00526          *
00527          */     
00528         function format_newlines($str)
00529         {
00530                 if ($str == '')
00531                 {
00532                         return $str;
00533                 }
00534 
00535                 if (strpos($str, "\n") === FALSE)
00536                 {
00537                         return '<p>'.$str.'</p>';
00538                 }
00539                         
00540                 $str = str_replace("\n\n", "</p>\n\n<p>", $str);
00541                 $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
00542                 
00543                 return '<p>'.$str.'</p>';
00544         }       
00545 }
00546 
00547 
00548 
00549 /* End of file typography_helper.php */
00550 /* Location: ./system/helpers/typography_helper.php */