fork download
  1. <?php
  2.  
  3. // your code goes here
  4. $text = '<strong>&amp;foo < FOO<em class="bar">bar</em></strong>';
  5.  
  6. $document = <<<EOD
  7. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://w...content-available-to-author-only...3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  8. <html xmlns="http://w...content-available-to-author-only...3.org/1999/xhtml">
  9. <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
  10. <body>!html</body>
  11. </html>
  12. EOD;
  13. // PHP's \DOMDocument serialization adds straw whitespace in case the markup
  14. // of the wrapping document contains newlines, so ensure to remove all
  15. // newlines before injecting the actual HTML body to process.
  16. $document = strtr($document, array("\n" => '', '!html' => $text));
  17.  
  18. $dom = new \DOMDocument();
  19. // Ignore warnings during HTML soup loading.
  20. @$dom->loadHTML($document);
  21.  
  22. $body_node = $dom->getElementsByTagName('body')->item(0);
  23. $html = '';
  24.  
  25. foreach ($body_node->getElementsByTagName('script') as $node) {
  26. static::escapeCdataElement($node);
  27. }
  28. foreach ($body_node->getElementsByTagName('style') as $node) {
  29. static::escapeCdataElement($node, '/*', '*/');
  30. }
  31. foreach ($body_node->childNodes as $node) {
  32. $html .= $dom->saveXML($node);
  33. }
  34.  
  35. $text = $html;
  36.  
  37. preg_match_all('/<[^>]++>|[^<>\s]++/', $text, $tokens);
  38.  
  39. print_r($tokens);
  40.  
  41. $counter = 0;
  42. $maxlength = 5;
  43. $newtext = array();
  44. foreach ($tokens[0] as $token) {
  45. if (mb_substr($token, 0, 1, 'utf-8') === '<') {
  46. $newtext[] = $token;
  47. continue;
  48. }
  49. $counter += strlen(html_entity_decode($token));
  50. if ($counter > $maxlength) {
  51. break;
  52. }
  53. $newtext[] = $token;
  54. }
  55. $newtext = implode('', $newtext);
  56. print_r($tokens);
  57. print_r($newtext);
Success #stdin #stdout 0.01s 20568KB
stdin
Standard input is empty
stdout
Array
(
    [0] => Array
        (
            [0] => <strong>
            [1] => &amp;foo
            [2] => bar
            [3] => </strong>
        )

)
Array
(
    [0] => Array
        (
            [0] => <strong>
            [1] => &amp;foo
            [2] => bar
            [3] => </strong>
        )

)
<strong>&amp;foo