mirror of
https://github.com/the-djmaze/snappymail.git
synced 2025-02-24 23:08:08 +08:00
Improved HTML UTF-8 handling
This commit is contained in:
parent
9218cc4785
commit
bc986f323c
3 changed files with 26 additions and 86 deletions
|
@ -91,27 +91,17 @@ abstract class HtmlUtils
|
|||
$sResult = '';
|
||||
if ($oElem instanceof \DOMDocument)
|
||||
{
|
||||
if (isset($oElem->documentElement))
|
||||
{
|
||||
$sResult = $oElem->saveHTML($oElem->documentElement);
|
||||
}
|
||||
else
|
||||
{
|
||||
$sResult = $oElem->saveHTML();
|
||||
}
|
||||
$sResult = $oElem->saveHTML(isset($oElem->documentElement) ? $oElem->documentElement : null);
|
||||
}
|
||||
else if ($oElem)
|
||||
else if ($oDom)
|
||||
{
|
||||
if ($oDom)
|
||||
{
|
||||
$sResult = $oDom->saveHTML($oElem);
|
||||
}
|
||||
else
|
||||
{
|
||||
$oTempDoc = self::createDOMDocument();
|
||||
$oTempDoc->appendChild($oTempDoc->importNode($oElem->cloneNode(true), true));
|
||||
$sResult = $oTempDoc->saveHTML();
|
||||
}
|
||||
$sResult = $oDom->saveHTML($oElem);
|
||||
}
|
||||
else
|
||||
{
|
||||
$oTempDoc = self::createDOMDocument();
|
||||
$oTempDoc->appendChild($oTempDoc->importNode($oElem->cloneNode(true), true));
|
||||
$sResult = $oTempDoc->saveHTML();
|
||||
}
|
||||
|
||||
return \trim($sResult);
|
||||
|
@ -188,8 +178,8 @@ abstract class HtmlUtils
|
|||
$sBodyAttrs = \preg_replace('/xmlns:[a-z]="[^"]*"/i', '', $sBodyAttrs);
|
||||
$sBodyAttrs = \preg_replace('/xmlns:[a-z]=\'[^\']*\'/i', '', $sBodyAttrs);
|
||||
|
||||
$sHtmlAttrs = trim($sHtmlAttrs);
|
||||
$sBodyAttrs = trim($sBodyAttrs);
|
||||
$sHtmlAttrs = \trim($sHtmlAttrs);
|
||||
$sBodyAttrs = \trim($sBodyAttrs);
|
||||
|
||||
return $sHtml;
|
||||
}
|
||||
|
@ -1078,8 +1068,8 @@ abstract class HtmlUtils
|
|||
|
||||
$sText = \strtr($sText, array(
|
||||
"\n" => "<br />",
|
||||
"\t" => ' ',
|
||||
' ' => ' '
|
||||
"\t" => "\xC2\xA0\xC2\xA0\xC2\xA0\xC2\xA0",
|
||||
' ' => "\xC2\xA0\xC2\xA0"
|
||||
));
|
||||
|
||||
return $sText;
|
||||
|
@ -1089,20 +1079,20 @@ abstract class HtmlUtils
|
|||
{
|
||||
$sText = \MailSo\Base\Utils::StripSpaces($sText);
|
||||
|
||||
$sText = \preg_replace_callback('/<h([1-6])[^>]*>/', function($m) {
|
||||
return "\n\n" . \str_repeat('#', $m[1]) . ' ';
|
||||
}, $sText);
|
||||
|
||||
$sText = \preg_replace(array(
|
||||
"/\r/",
|
||||
"/[\n\t]+/",
|
||||
'/<script[^>]*>.*?<\/script>/i',
|
||||
'/<style[^>]*>.*?<\/style>/i',
|
||||
'/<title[^>]*>.*?<\/title>/i',
|
||||
'/<h[123][^>]*>(.+?)<\/h[123]>/i',
|
||||
'/<h[456][^>]*>(.+?)<\/h[456]>/i',
|
||||
'/<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<title[^>]*>.*?<\/title>/i',
|
||||
'/<\/h[1-6]>/i',
|
||||
'/<p[^>]*>/i',
|
||||
'/<br[^>]*>/i',
|
||||
'/<b[^>]*>(.+?)<\/b>/i',
|
||||
'/<i[^>]*>(.+?)<\/i>/i',
|
||||
'/(<ul[^>]*>|<\/ul>)/i',
|
||||
'/(<ol[^>]*>|<\/ol>)/i',
|
||||
'/<ul[^>]*>|<\/ul>|<ol[^>]*>|<\/ol>/i',
|
||||
'/<li[^>]*>/i',
|
||||
'/<a[^>]*href="([^"]+)"[^>]*>(.+?)<\/a>/i',
|
||||
'/<hr[^>]*>/i',
|
||||
|
@ -1110,69 +1100,23 @@ abstract class HtmlUtils
|
|||
'/(<tr[^>]*>|<\/tr>)/i',
|
||||
'/<td[^>]*>(.+?)<\/td>/i',
|
||||
'/<th[^>]*>(.+?)<\/th>/i',
|
||||
'/ /i',
|
||||
'/"/i',
|
||||
'/&/i',
|
||||
'/©/i',
|
||||
'/™/i',
|
||||
'/“/',
|
||||
'/”/',
|
||||
'/–/',
|
||||
'/’/',
|
||||
'/&/',
|
||||
'/©/',
|
||||
'/™/',
|
||||
'/—/',
|
||||
'/“/',
|
||||
'/”/',
|
||||
'/•/',
|
||||
'/®/i',
|
||||
'/•/i',
|
||||
'/&[&;]+;/i',
|
||||
'/'/',
|
||||
'/ /'
|
||||
), array(
|
||||
'',
|
||||
' ',
|
||||
'',
|
||||
'',
|
||||
'',
|
||||
"\n\n\\1\n\n",
|
||||
"\n\n\\1\n\n",
|
||||
"\n\n",
|
||||
"\n\n\t",
|
||||
"\n",
|
||||
'\\1',
|
||||
'\\1',
|
||||
"\n\n",
|
||||
"\n\n",
|
||||
"\n\t* ",
|
||||
'\\2 (\\1)',
|
||||
"\n------------------------------------\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\t\\1\n",
|
||||
"\t\\1\n",
|
||||
' ',
|
||||
'"',
|
||||
'&',
|
||||
'(c)',
|
||||
'(tm)',
|
||||
'"',
|
||||
'"',
|
||||
'-',
|
||||
"'",
|
||||
'&',
|
||||
'(c)',
|
||||
'(tm)',
|
||||
'--',
|
||||
'"',
|
||||
'"',
|
||||
'*',
|
||||
'(R)',
|
||||
'*',
|
||||
'',
|
||||
'\'',
|
||||
''
|
||||
"\t\\1\n"
|
||||
), $sText);
|
||||
|
||||
$sText = \str_ireplace('<div>',"\n<div>", $sText);
|
||||
|
@ -1180,13 +1124,7 @@ abstract class HtmlUtils
|
|||
$sText = \preg_replace("/\n\\s+\n/", "\n", $sText);
|
||||
$sText = \preg_replace("/[\n]{3,}/", "\n\n", $sText);
|
||||
|
||||
$sText = \preg_replace(array(
|
||||
'/>/i',
|
||||
'/</i'
|
||||
), array(
|
||||
'>',
|
||||
'<'
|
||||
), $sText);
|
||||
$sText = \html_entity_decode($sText, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8');
|
||||
|
||||
return \trim($sText);
|
||||
}
|
||||
|
|
|
@ -133,7 +133,7 @@ class Utils
|
|||
// return $sHtml;
|
||||
return \preg_replace(
|
||||
['@"\\s*/>@', '/\\s* /i', '/ \\s*/i', '/[\\r\\n\\t]+/', '/>\\s+</'],
|
||||
['">', ' ', ' ', ' ', '><'],
|
||||
['">', "\xC2\xA0", "\xC2\xA0", ' ', '><'],
|
||||
\trim($sHtml)
|
||||
);
|
||||
}
|
||||
|
|
|
@ -146,6 +146,8 @@ if (defined('APP_VERSION'))
|
|||
|
||||
define('APP_PLUGINS_PATH', APP_PRIVATE_DATA.'plugins/');
|
||||
|
||||
ini_set('default_charset', 'UTF-8');
|
||||
ini_set('internal_encoding', 'UTF-8');
|
||||
mb_internal_encoding('UTF-8');
|
||||
mb_language('uni');
|
||||
|
||||
|
|
Loading…
Reference in a new issue