mirror of
https://github.com/the-djmaze/snappymail.git
synced 2025-02-25 07:16:21 +08:00
Improved HTML UTF-8 handling
This commit is contained in:
parent
9218cc4785
commit
bc986f323c
3 changed files with 26 additions and 86 deletions
|
@ -91,27 +91,17 @@ abstract class HtmlUtils
|
||||||
$sResult = '';
|
$sResult = '';
|
||||||
if ($oElem instanceof \DOMDocument)
|
if ($oElem instanceof \DOMDocument)
|
||||||
{
|
{
|
||||||
if (isset($oElem->documentElement))
|
$sResult = $oElem->saveHTML(isset($oElem->documentElement) ? $oElem->documentElement : null);
|
||||||
{
|
|
||||||
$sResult = $oElem->saveHTML($oElem->documentElement);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
$sResult = $oElem->saveHTML();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if ($oElem)
|
else if ($oDom)
|
||||||
{
|
{
|
||||||
if ($oDom)
|
$sResult = $oDom->saveHTML($oElem);
|
||||||
{
|
}
|
||||||
$sResult = $oDom->saveHTML($oElem);
|
else
|
||||||
}
|
{
|
||||||
else
|
$oTempDoc = self::createDOMDocument();
|
||||||
{
|
$oTempDoc->appendChild($oTempDoc->importNode($oElem->cloneNode(true), true));
|
||||||
$oTempDoc = self::createDOMDocument();
|
$sResult = $oTempDoc->saveHTML();
|
||||||
$oTempDoc->appendChild($oTempDoc->importNode($oElem->cloneNode(true), true));
|
|
||||||
$sResult = $oTempDoc->saveHTML();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return \trim($sResult);
|
return \trim($sResult);
|
||||||
|
@ -188,8 +178,8 @@ abstract class HtmlUtils
|
||||||
$sBodyAttrs = \preg_replace('/xmlns:[a-z]="[^"]*"/i', '', $sBodyAttrs);
|
$sBodyAttrs = \preg_replace('/xmlns:[a-z]="[^"]*"/i', '', $sBodyAttrs);
|
||||||
$sBodyAttrs = \preg_replace('/xmlns:[a-z]=\'[^\']*\'/i', '', $sBodyAttrs);
|
$sBodyAttrs = \preg_replace('/xmlns:[a-z]=\'[^\']*\'/i', '', $sBodyAttrs);
|
||||||
|
|
||||||
$sHtmlAttrs = trim($sHtmlAttrs);
|
$sHtmlAttrs = \trim($sHtmlAttrs);
|
||||||
$sBodyAttrs = trim($sBodyAttrs);
|
$sBodyAttrs = \trim($sBodyAttrs);
|
||||||
|
|
||||||
return $sHtml;
|
return $sHtml;
|
||||||
}
|
}
|
||||||
|
@ -1078,8 +1068,8 @@ abstract class HtmlUtils
|
||||||
|
|
||||||
$sText = \strtr($sText, array(
|
$sText = \strtr($sText, array(
|
||||||
"\n" => "<br />",
|
"\n" => "<br />",
|
||||||
"\t" => ' ',
|
"\t" => "\xC2\xA0\xC2\xA0\xC2\xA0\xC2\xA0",
|
||||||
' ' => ' '
|
' ' => "\xC2\xA0\xC2\xA0"
|
||||||
));
|
));
|
||||||
|
|
||||||
return $sText;
|
return $sText;
|
||||||
|
@ -1089,20 +1079,20 @@ abstract class HtmlUtils
|
||||||
{
|
{
|
||||||
$sText = \MailSo\Base\Utils::StripSpaces($sText);
|
$sText = \MailSo\Base\Utils::StripSpaces($sText);
|
||||||
|
|
||||||
|
$sText = \preg_replace_callback('/<h([1-6])[^>]*>/', function($m) {
|
||||||
|
return "\n\n" . \str_repeat('#', $m[1]) . ' ';
|
||||||
|
}, $sText);
|
||||||
|
|
||||||
$sText = \preg_replace(array(
|
$sText = \preg_replace(array(
|
||||||
"/\r/",
|
"/\r/",
|
||||||
"/[\n\t]+/",
|
"/[\n\t]+/",
|
||||||
'/<script[^>]*>.*?<\/script>/i',
|
'/<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|<title[^>]*>.*?<\/title>/i',
|
||||||
'/<style[^>]*>.*?<\/style>/i',
|
'/<\/h[1-6]>/i',
|
||||||
'/<title[^>]*>.*?<\/title>/i',
|
|
||||||
'/<h[123][^>]*>(.+?)<\/h[123]>/i',
|
|
||||||
'/<h[456][^>]*>(.+?)<\/h[456]>/i',
|
|
||||||
'/<p[^>]*>/i',
|
'/<p[^>]*>/i',
|
||||||
'/<br[^>]*>/i',
|
'/<br[^>]*>/i',
|
||||||
'/<b[^>]*>(.+?)<\/b>/i',
|
'/<b[^>]*>(.+?)<\/b>/i',
|
||||||
'/<i[^>]*>(.+?)<\/i>/i',
|
'/<i[^>]*>(.+?)<\/i>/i',
|
||||||
'/(<ul[^>]*>|<\/ul>)/i',
|
'/<ul[^>]*>|<\/ul>|<ol[^>]*>|<\/ol>/i',
|
||||||
'/(<ol[^>]*>|<\/ol>)/i',
|
|
||||||
'/<li[^>]*>/i',
|
'/<li[^>]*>/i',
|
||||||
'/<a[^>]*href="([^"]+)"[^>]*>(.+?)<\/a>/i',
|
'/<a[^>]*href="([^"]+)"[^>]*>(.+?)<\/a>/i',
|
||||||
'/<hr[^>]*>/i',
|
'/<hr[^>]*>/i',
|
||||||
|
@ -1110,69 +1100,23 @@ abstract class HtmlUtils
|
||||||
'/(<tr[^>]*>|<\/tr>)/i',
|
'/(<tr[^>]*>|<\/tr>)/i',
|
||||||
'/<td[^>]*>(.+?)<\/td>/i',
|
'/<td[^>]*>(.+?)<\/td>/i',
|
||||||
'/<th[^>]*>(.+?)<\/th>/i',
|
'/<th[^>]*>(.+?)<\/th>/i',
|
||||||
'/ /i',
|
|
||||||
'/"/i',
|
|
||||||
'/&/i',
|
|
||||||
'/©/i',
|
|
||||||
'/™/i',
|
|
||||||
'/“/',
|
|
||||||
'/”/',
|
|
||||||
'/–/',
|
|
||||||
'/’/',
|
|
||||||
'/&/',
|
|
||||||
'/©/',
|
|
||||||
'/™/',
|
|
||||||
'/—/',
|
|
||||||
'/“/',
|
|
||||||
'/”/',
|
|
||||||
'/•/',
|
|
||||||
'/®/i',
|
|
||||||
'/•/i',
|
|
||||||
'/&[&;]+;/i',
|
|
||||||
'/'/',
|
|
||||||
'/ /'
|
|
||||||
), array(
|
), array(
|
||||||
'',
|
'',
|
||||||
' ',
|
' ',
|
||||||
'',
|
'',
|
||||||
'',
|
"\n\n",
|
||||||
'',
|
|
||||||
"\n\n\\1\n\n",
|
|
||||||
"\n\n\\1\n\n",
|
|
||||||
"\n\n\t",
|
"\n\n\t",
|
||||||
"\n",
|
"\n",
|
||||||
'\\1',
|
'\\1',
|
||||||
'\\1',
|
'\\1',
|
||||||
"\n\n",
|
"\n\n",
|
||||||
"\n\n",
|
|
||||||
"\n\t* ",
|
"\n\t* ",
|
||||||
'\\2 (\\1)',
|
'\\2 (\\1)',
|
||||||
"\n------------------------------------\n",
|
"\n------------------------------------\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\t\\1\n",
|
"\t\\1\n",
|
||||||
"\t\\1\n",
|
"\t\\1\n"
|
||||||
' ',
|
|
||||||
'"',
|
|
||||||
'&',
|
|
||||||
'(c)',
|
|
||||||
'(tm)',
|
|
||||||
'"',
|
|
||||||
'"',
|
|
||||||
'-',
|
|
||||||
"'",
|
|
||||||
'&',
|
|
||||||
'(c)',
|
|
||||||
'(tm)',
|
|
||||||
'--',
|
|
||||||
'"',
|
|
||||||
'"',
|
|
||||||
'*',
|
|
||||||
'(R)',
|
|
||||||
'*',
|
|
||||||
'',
|
|
||||||
'\'',
|
|
||||||
''
|
|
||||||
), $sText);
|
), $sText);
|
||||||
|
|
||||||
$sText = \str_ireplace('<div>',"\n<div>", $sText);
|
$sText = \str_ireplace('<div>',"\n<div>", $sText);
|
||||||
|
@ -1180,13 +1124,7 @@ abstract class HtmlUtils
|
||||||
$sText = \preg_replace("/\n\\s+\n/", "\n", $sText);
|
$sText = \preg_replace("/\n\\s+\n/", "\n", $sText);
|
||||||
$sText = \preg_replace("/[\n]{3,}/", "\n\n", $sText);
|
$sText = \preg_replace("/[\n]{3,}/", "\n\n", $sText);
|
||||||
|
|
||||||
$sText = \preg_replace(array(
|
$sText = \html_entity_decode($sText, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401, 'UTF-8');
|
||||||
'/>/i',
|
|
||||||
'/</i'
|
|
||||||
), array(
|
|
||||||
'>',
|
|
||||||
'<'
|
|
||||||
), $sText);
|
|
||||||
|
|
||||||
return \trim($sText);
|
return \trim($sText);
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,7 +133,7 @@ class Utils
|
||||||
// return $sHtml;
|
// return $sHtml;
|
||||||
return \preg_replace(
|
return \preg_replace(
|
||||||
['@"\\s*/>@', '/\\s* /i', '/ \\s*/i', '/[\\r\\n\\t]+/', '/>\\s+</'],
|
['@"\\s*/>@', '/\\s* /i', '/ \\s*/i', '/[\\r\\n\\t]+/', '/>\\s+</'],
|
||||||
['">', ' ', ' ', ' ', '><'],
|
['">', "\xC2\xA0", "\xC2\xA0", ' ', '><'],
|
||||||
\trim($sHtml)
|
\trim($sHtml)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -146,6 +146,8 @@ if (defined('APP_VERSION'))
|
||||||
|
|
||||||
define('APP_PLUGINS_PATH', APP_PRIVATE_DATA.'plugins/');
|
define('APP_PLUGINS_PATH', APP_PRIVATE_DATA.'plugins/');
|
||||||
|
|
||||||
|
ini_set('default_charset', 'UTF-8');
|
||||||
|
ini_set('internal_encoding', 'UTF-8');
|
||||||
mb_internal_encoding('UTF-8');
|
mb_internal_encoding('UTF-8');
|
||||||
mb_language('uni');
|
mb_language('uni');
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue