Public Member Functions | |
| convert ($str) | |
| Main Processing Function. | |
| format_characters ($str) | |
| Format Characters. | |
| format_newlines ($str) | |
| Format Newlines. | |
Public Attributes | |
| $block_elements = 'div|blockquote|pre|code|h\d|script|ol|ul' | |
| $skip_elements = 'pre|ol|ul' | |
| $ignore_elements = 'a|b|i|em|strong|span|img|li' | |
Definition at line 94 of file typography_helper.php.
| Auto_typography::convert | ( | $ | str | ) |
Main Processing Function.
Definition at line 110 of file typography_helper.php.
References format_characters(), and format_newlines().
00111 { 00112 if ($str == '') 00113 { 00114 return ''; 00115 } 00116 00117 $str = ' '.$str.' '; 00118 00119 // Standardize Newlines to make matching easier 00120 if (strpos($str, "\r") !== FALSE) 00121 { 00122 $str = str_replace(array("\r\n", "\r"), "\n", $str); 00123 } 00124 00125 /* 00126 * Reduce line breaks 00127 * 00128 * If there are more than two consecutive line 00129 * breaks we'll compress them down to a maximum 00130 * of two since there's no benefit to more. 00131 * 00132 */ 00133 $str = preg_replace("/\n\n+/", "\n\n", $str); 00134 00135 /* 00136 * Convert quotes within tags to temporary marker 00137 * 00138 * We don't want quotes converted within 00139 * tags so we'll temporarily convert them to 00140 * {@DQ} and {@SQ} 00141 * 00142 */ 00143 if (preg_match_all("#<.+?>#si", $str, $matches)) 00144 { 00145 for ($i = 0; $i < count($matches['0']); $i++) 00146 { 00147 $str = str_replace($matches['0'][$i], 00148 str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]), 00149 $str); 00150 } 00151 } 00152 00153 00154 /* 00155 * Add closing/opening paragraph tags before/after "block" elements 00156 * 00157 * Since block elements (like <blockquotes>, <pre>, etc.) do not get 00158 * wrapped in paragraph tags we will add a closing </p> tag just before 00159 * each block element starts and an opening <p> tag right after the block element 00160 * ends. Later on we'll do some further clean up. 00161 * 00162 */ 00163 $str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str); 00164 $str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str); 00165 00166 /* 00167 * Convert "ignore" tags to temporary marker 00168 * 00169 * The parser splits out the string at every tag 00170 * it encounters. Certain inline tags, like image 00171 * tags, links, span tags, etc. will be adversely 00172 * affected if they are split out so we'll convert 00173 * the opening < temporarily to: {@TAG} 00174 * 00175 */ 00176 $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str); 00177 00178 /* 00179 * Split the string at every tag 00180 * 00181 * This creates an array with this prototype: 00182 * 00183 * [array] 00184 * { 00185 * [0] = <opening tag> 00186 * [1] = Content contained between the tags 00187 * [2] = <closing tag> 00188 * Etc... 00189 * } 00190 * 00191 */ 00192 $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY); 00193 00194 /* 00195 * Build our finalized string 00196 * 00197 * We'll cycle through the array, skipping tags, 00198 * and processing the contained text 00199 * 00200 */ 00201 $str = ''; 00202 $process = TRUE; 00203 foreach ($chunks as $chunk) 00204 { 00205 /* 00206 * Are we dealing with a tag? 00207 * 00208 * If so, we'll skip the processing for this cycle. 00209 * Well also set the "process" flag which allows us 00210 * to skip <pre> tags and a few other things. 00211 * 00212 */ 00213 if (preg_match("#<(/*)(".$this->block_elements.").*?>#", $chunk, $match)) 00214 { 00215 if (preg_match("#".$this->skip_elements."#", $match['2'])) 00216 { 00217 $process = ($match['1'] == '/') ? TRUE : FALSE; 00218 } 00219 00220 $str .= $chunk; 00221 continue; 00222 } 00223 00224 if ($process == FALSE) 00225 { 00226 $str .= $chunk; 00227 continue; 00228 } 00229 00230 // Convert Newlines into <p> and <br /> tags 00231 $str .= $this->format_newlines($chunk); 00232 } 00233 00234 // FINAL CLEAN UP 00235 // IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW! 00236 00237 /* 00238 * Clean up paragraph tags before/after "block" elements 00239 * 00240 * Earlier we added <p></p> tags before/after block level elements. 00241 * Then, we added paragraph tags around double line breaks. This 00242 * potentially created incorrectly formatted paragraphs so we'll 00243 * clean it up here. 00244 * 00245 */ 00246 $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str); 00247 $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str); 00248 00249 // Convert Quotes and other characters 00250 $str = $this->format_characters($str); 00251 00252 // Fix an artifact that happens during the paragraph replacement 00253 $str = preg_replace('#(<p>\n*</p>)#', '', $str); 00254 00255 // If the user submitted their own paragraph tags with class data 00256 // in them we will retain them instead of using our tags. 00257 $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str); 00258 00259 // Final clean up 00260 $str = str_replace( 00261 array( 00262 '</p></p>', 00263 '</p><p>', 00264 '<p> ', 00265 ' </p>', 00266 '{@TAG}', 00267 '{@DQ}', 00268 '{@SQ}', 00269 '<p></p>' 00270 ), 00271 array( 00272 '</p>', 00273 '<p>', 00274 '<p>', 00275 '</p>', 00276 '<', 00277 '"', 00278 "'", 00279 '' 00280 ), 00281 $str 00282 ); 00283 00284 return $str; 00285 }

| Auto_typography::format_characters | ( | $ | str | ) |
Format Characters.
This function mainly converts double and single quotes to entities, but since these are directional, it does it based on some rules. It also converts em-dashes and a couple other things.
Definition at line 297 of file typography_helper.php.
Referenced by convert().
00298 { 00299 $table = array( 00300 ' "' => " “", 00301 '" ' => "” ", 00302 " '" => " ‘", 00303 "' " => "’ ", 00304 00305 '>"' => ">“", 00306 '"<' => "”<", 00307 ">'" => ">‘", 00308 "'<" => "’<", 00309 00310 "\"." => "”.", 00311 "\"," => "”,", 00312 "\";" => "”;", 00313 "\":" => "”:", 00314 "\"!" => "”!", 00315 "\"?" => "”?", 00316 00317 ". " => ". ", 00318 "? " => "? ", 00319 "! " => "! ", 00320 ": " => ": ", 00321 ); 00322 00323 // These deal with quotes within quotes, like: "'hi here'" 00324 $start = 0; 00325 $space = array("\n", "\t", " "); 00326 00327 while(TRUE) 00328 { 00329 $current = strpos(substr($str, $start), "\"'"); 00330 00331 if ($current === FALSE) break; 00332 00333 $one_before = substr($str, $start+$current-1, 1); 00334 $one_after = substr($str, $start+$current+2, 1); 00335 00336 if ( ! in_array($one_after, $space, TRUE) && $one_after != "<") 00337 { 00338 $str = str_replace( $one_before."\"'".$one_after, 00339 $one_before."“‘".$one_after, 00340 $str); 00341 } 00342 elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<')) 00343 { 00344 $str = str_replace( $one_before."\"'".$one_after, 00345 $one_before."”’".$one_after, 00346 $str); 00347 } 00348 00349 $start = $start+$current+2; 00350 } 00351 00352 $start = 0; 00353 00354 while(TRUE) 00355 { 00356 $current = strpos(substr($str, $start), "'\""); 00357 00358 if ($current === FALSE) break; 00359 00360 $one_before = substr($str, $start+$current-1, 1); 00361 $one_after = substr($str, $start+$current+2, 1); 00362 00363 if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<") 00364 { 00365 $str = str_replace( $one_before."'\"".$one_after, 00366 $one_before."‘“".$one_after, 00367 $str); 00368 } 00369 elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">") 00370 { 00371 $str = str_replace( $one_before."'\"".$one_after, 00372 $one_before."’”".$one_after, 00373 $str); 00374 } 00375 00376 $start = $start+$current+2; 00377 } 00378 00379 // Are there quotes within a word, as in: ("something") 00380 if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches)) 00381 { 00382 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i) 00383 { 00384 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE)) 00385 { 00386 $str = str_replace( $matches['0'][$i], 00387 $matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i], 00388 $str); 00389 } 00390 } 00391 } 00392 00393 if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches)) 00394 { 00395 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i) 00396 { 00397 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE)) 00398 { 00399 $str = str_replace( $matches['0'][$i], 00400 $matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i], 00401 $str); 00402 } 00403 } 00404 } 00405 00406 // How about one apostrophe, as in Rick's 00407 $start = 0; 00408 00409 while(TRUE) 00410 { 00411 $current = strpos(substr($str, $start), "'"); 00412 00413 if ($current === FALSE) break; 00414 00415 $one_before = substr($str, $start+$current-1, 1); 00416 $one_after = substr($str, $start+$current+1, 1); 00417 00418 if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE)) 00419 { 00420 $str = str_replace( $one_before."'".$one_after, 00421 $one_before."’".$one_after, 00422 $str); 00423 } 00424 00425 $start = $start+$current+2; 00426 } 00427 00428 // Em-dashes 00429 $start = 0; 00430 while(TRUE) 00431 { 00432 $current = strpos(substr($str, $start), "--"); 00433 00434 if ($current === FALSE) break; 00435 00436 $one_before = substr($str, $start+$current-1, 1); 00437 $one_after = substr($str, $start+$current+2, 1); 00438 $two_before = substr($str, $start+$current-2, 1); 00439 $two_after = substr($str, $start+$current+3, 1); 00440 00441 if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE)) 00442 OR 00443 ( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ') 00444 ) 00445 { 00446 $str = str_replace( $two_before.$one_before."--".$one_after.$two_after, 00447 $two_before.trim($one_before)."—".trim($one_after).$two_after, 00448 $str); 00449 } 00450 00451 $start = $start+$current+2; 00452 } 00453 00454 // Ellipsis 00455 $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1…\\2", $str); 00456 $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1…\\2", $str); 00457 00458 // Run the translation array we defined above 00459 $str = str_replace(array_keys($table), array_values($table), $str); 00460 00461 // If there are any stray double quotes we'll catch them here 00462 00463 $start = 0; 00464 00465 while(TRUE) 00466 { 00467 $current = strpos(substr($str, $start), '"'); 00468 00469 if ($current === FALSE) break; 00470 00471 $one_before = substr($str, $start+$current-1, 1); 00472 $one_after = substr($str, $start+$current+1, 1); 00473 00474 if ( ! in_array($one_after, $space, TRUE)) 00475 { 00476 $str = str_replace( $one_before.'"'.$one_after, 00477 $one_before."“".$one_after, 00478 $str); 00479 } 00480 elseif( ! in_array($one_before, $space, TRUE)) 00481 { 00482 $str = str_replace( $one_before."'".$one_after, 00483 $one_before."”".$one_after, 00484 $str); 00485 } 00486 00487 $start = $start+$current+2; 00488 } 00489 00490 $start = 0; 00491 00492 while(TRUE) 00493 { 00494 $current = strpos(substr($str, $start), "'"); 00495 00496 if ($current === FALSE) break; 00497 00498 $one_before = substr($str, $start+$current-1, 1); 00499 $one_after = substr($str, $start+$current+1, 1); 00500 00501 if ( ! in_array($one_after, $space, TRUE)) 00502 { 00503 $str = str_replace( $one_before."'".$one_after, 00504 $one_before."‘".$one_after, 00505 $str); 00506 } 00507 elseif( ! in_array($one_before, $space, TRUE)) 00508 { 00509 $str = str_replace( $one_before."'".$one_after, 00510 $one_before."’".$one_after, 00511 $str); 00512 } 00513 00514 $start = $start+$current+2; 00515 } 00516 00517 return $str; 00518 }

| Auto_typography::format_newlines | ( | $ | str | ) |
Format Newlines.
Converts newline characters into either
tags or
Definition at line 528 of file typography_helper.php.
Referenced by convert().
00529 { 00530 if ($str == '') 00531 { 00532 return $str; 00533 } 00534 00535 if (strpos($str, "\n") === FALSE) 00536 { 00537 return '<p>'.$str.'</p>'; 00538 } 00539 00540 $str = str_replace("\n\n", "</p>\n\n<p>", $str); 00541 $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str); 00542 00543 return '<p>'.$str.'</p>'; 00544 }

| Auto_typography::$block_elements = 'div|blockquote|pre|code|h\d|script|ol|ul' |
Definition at line 97 of file typography_helper.php.
| Auto_typography::$ignore_elements = 'a|b|i|em|strong|span|img|li' |
Definition at line 103 of file typography_helper.php.
| Auto_typography::$skip_elements = 'pre|ol|ul' |
Definition at line 100 of file typography_helper.php.