encoding = 'UTF-8'; @$oDom->loadHTML( '<'.'?xml version="1.0" encoding="utf-8"?'.'>'. '
'.$sText.'' ); return $oDom; } /** * @param string $sHtml * @param string $sHtmlAttrs = ' * @param string $sBodyAttrs = '' * * @return string */ public static function ClearBodyAndHtmlTag($sHtml, &$sHtmlAttrs = '', &$sBodyAttrs = '') { $aMatch = array(); if (preg_match('/]+)>/im', $sHtml, $aMatch) && !empty($aMatch[1])) { $sHtmlAttrs = $aMatch[1]; } $aMatch = array(); if (preg_match('/]+)>/im', $sHtml, $aMatch) && !empty($aMatch[1])) { $sBodyAttrs = $aMatch[1]; } $sHtml = \preg_replace('/]*)>/im', '', $sHtml); $sHtml = \preg_replace('/<\/body>/im', '', $sHtml); $sHtml = \preg_replace('/]*)>/im', '', $sHtml); $sHtml = \preg_replace('/<\/html>/im', '', $sHtml); return $sHtml; } /** * @param string $sHtml * * @return string */ public static function ClearTags($sHtml) { $aRemoveTags = array( 'head', 'link', 'base', 'meta', 'title', 'style', 'script', 'bgsound', 'object', 'embed', 'applet', 'mocha', 'iframe', 'frame', 'frameset' ); $aToRemove = array( '/]*>/msi', '/<\?xml [^>]*\?>/msi' ); foreach ($aRemoveTags as $sTag) { $aToRemove[] = '\'<'.$sTag.'[^>]*>.*?[\s]*'.$sTag.'>\'msi'; $aToRemove[] = '\'<'.$sTag.'[^>]*>\'msi'; $aToRemove[] = '\'[\s]*'.$sTag.'[^>]*>\'msi'; } return \preg_replace($aToRemove, '', $sHtml); } /** * @param string $sHtml * * @return string */ public static function ClearOn($sHtml) { $aToReplace = array( '/on(Blur)/si', '/on(Change)/si', '/on(Click)/si', '/on(DblClick)/si', '/on(Error)/si', '/on(Focus)/si', '/on(KeyDown)/si', '/on(KeyPress)/si', '/on(KeyUp)/si', '/on(Load)/si', '/on(MouseDown)/si', '/on(MouseEnter)/si', '/on(MouseLeave)/si', '/on(MouseMove)/si', '/on(MouseOut)/si', '/on(MouseOver)/si', '/on(MouseUp)/si', '/on(Move)/si', '/on(Resize)/si', '/on(ResizeEnd)/si', '/on(ResizeStart)/si', '/on(Scroll)/si', '/on(Select)/si', '/on(Submit)/si', '/on(Unload)/si' ); return \preg_replace($aToReplace, 'оn\\1', $sHtml); } /** * * @param string $sStyle * @param \DOMElement $oElement * @param bool $bHasExternals * @param array $aFoundCIDs * @param array $aContentLocationUrls * @param array $aFoundedContentLocationUrls * @param bool $bDoNotReplaceExternalUrl = false * * @return string */ public static function ClearStyle($sStyle, $oElement, &$bHasExternals, &$aFoundCIDs, $aContentLocationUrls, &$aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl = false) { $sStyle = \trim($sStyle); $aOutStyles = array(); $aStyles = \explode(';', $sStyle); $aMatch = array(); foreach ($aStyles as $sStyleItem) { $aStyleValue = \explode(':', $sStyleItem, 2); $sName = \trim(\strtolower($aStyleValue[0])); $sValue = isset($aStyleValue[1]) ? \trim($aStyleValue[1]) : ''; if ('position' === $sName && 'fixed' === \strtolower($sValue)) { $sValue = 'absolute'; } if (0 === \strlen($sName) || 0 === \strlen($sValue)) { continue; } $sStyleItem = $sName.': '.$sValue; $aStyleValue = array($sName, $sValue); /*if (\in_array($sName, array('position', 'left', 'right', 'top', 'bottom', 'behavior', 'cursor'))) { // skip } else */if (\in_array($sName, array('behavior', 'cursor')) || ('display' === $sName && 'none' === \strtolower($sValue)) || \preg_match('/expression/i', $sValue) || ('text-indent' === $sName && '-' === \substr(trim($sValue), 0, 1)) ) { // skip } else if (\in_array($sName, array('background-image', 'background', 'list-style-image', 'content')) && \preg_match('/url[\s]?\(([^)]+)\)/im', $sValue, $aMatch) && !empty($aMatch[1])) { $sFullUrl = \trim($aMatch[0], '"\' '); $sUrl = \trim($aMatch[1], '"\' '); $sStyleValue = \trim(\preg_replace('/[\s]+/', ' ', \str_replace($sFullUrl, '', $sValue))); $sStyleItem = empty($sStyleValue) ? '' : $sName.': '.$sStyleValue; if ('cid:' === \strtolower(\substr($sUrl, 0, 4))) { if ($oElement) { $oElement->setAttribute('data-x-style-cid-name', 'background' === $sName ? 'background-image' : $sName); $oElement->setAttribute('data-x-style-cid', \substr($sUrl, 4)); $aFoundCIDs[] = \substr($sUrl, 4); } } else { if ($oElement) { if (\preg_match('/http[s]?:\/\//i', $sUrl)) { $bHasExternals = true; if (!$bDoNotReplaceExternalUrl) { if (\in_array($sName, array('background-image', 'list-style-image', 'content'))) { $sStyleItem = ''; } $sTemp = ''; if ($oElement->hasAttribute('data-x-style-url')) { $sTemp = \trim($oElement->getAttribute('data-x-style-url')); } $sTemp = empty($sTemp) ? '' : (';' === \substr($sTemp, -1) ? $sTemp.' ' : $sTemp.'; '); $oElement->setAttribute('data-x-style-url', \trim($sTemp. ('background' === $sName ? 'background-image' : $sName).': '.$sFullUrl, ' ;')); } } else if ('data:image/' !== \strtolower(\substr(\trim($sUrl), 0, 11))) { $oElement->setAttribute('data-x-broken-style-src', $sFullUrl); } } } if (!empty($sStyleItem)) { $aOutStyles[] = $sStyleItem; } } else if ('height' === $sName) { // $aOutStyles[] = 'min-'.ltrim($sStyleItem); $aOutStyles[] = $sStyleItem; } else { $aOutStyles[] = $sStyleItem; } } return \implode(';', $aOutStyles); } /** * @param string $sHtml * @param bool $bDoNotReplaceExternalUrl = false * * @return string */ public static function ClearHtmlSimple($sHtml, $bDoNotReplaceExternalUrl = false) { $bHasExternals = false; $aFoundCIDs = array(); $aContentLocationUrls = array(); $aFoundedContentLocationUrls = array(); return \MailSo\Base\HtmlUtils::ClearHtml($sHtml, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl); } /** * @param string $sHtml * @param bool $bHasExternals = false * @param array $aFoundCIDs = array() * @param array $aContentLocationUrls = array() * @param array $aFoundedContentLocationUrls = array() * @param bool $bDoNotReplaceExternalUrl = false * * @return string */ public static function ClearHtml($sHtml, &$bHasExternals = false, &$aFoundCIDs = array(), $aContentLocationUrls = array(), &$aFoundedContentLocationUrls = array(), $bDoNotReplaceExternalUrl = false) { $sHtml = null === $sHtml ? '' : (string) $sHtml; $sHtml = \trim($sHtml); if (0 === \strlen($sHtml)) { return ''; } $bHasExternals = false; $sHtml = \MailSo\Base\HtmlUtils::ClearTags($sHtml); $sHtml = \MailSo\Base\HtmlUtils::ClearOn($sHtml); $sHtmlAttrs = $sBodyAttrs = ''; $sHtml = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sHtml, $sHtmlAttrs, $sBodyAttrs); // Dom Part $oDom = \MailSo\Base\HtmlUtils::GetDomFromText($sHtml, $sHtmlAttrs, $sBodyAttrs); unset($sHtml); if ($oDom) { $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as /* @var $oElement \DOMElement */ $oElement) { $sTagNameLower = \strtolower($oElement->tagName); // convert body attributes to styles if ('body' === $sTagNameLower) { $aAttrs = array( 'text' => '', 'topmargin' => '', 'leftmargin' => '', 'bottommargin' => '', 'rightmargin' => '' ); if (isset($oElement->attributes)) { foreach ($oElement->attributes as $sAttributeName => /* @var $oAttributeNode \DOMNode */ $oAttributeNode) { if ($oAttributeNode && isset($oAttributeNode->nodeValue)) { $sAttributeNameLower = \strtolower($sAttributeName); if (isset($aAttrs[$sAttributeNameLower]) && '' === $aAttrs[$sAttributeNameLower]) { $aAttrs[$sAttributeNameLower] = array($sAttributeName, \trim($oAttributeNode->nodeValue)); } } } } $aStyles = array(); foreach ($aAttrs as $sIndex => $aItem) { if (\is_array($aItem)) { $oElement->removeAttribute($aItem[0]); switch ($sIndex) { case 'text': $aStyles[] = 'color: '.$aItem[1]; break; case 'topmargin': $aStyles[] = 'margin-top: '.((int) $aItem[1]).'px'; break; case 'leftmargin': $aStyles[] = 'margin-left: '.((int) $aItem[1]).'px'; break; case 'bottommargin': $aStyles[] = 'margin-bottom: '.((int) $aItem[1]).'px'; break; case 'rightmargin': $aStyles[] = 'margin-right: '.((int) $aItem[1]).'px'; break; } } } if (0 < \count($aStyles)) { $sStyles = $oElement->hasAttribute('style') ? $oElement->getAttribute('style') : ''; $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles.'; ').\implode('; ', $aStyles)); } } if ('iframe' === $sTagNameLower || 'frame' === $sTagNameLower) { $oElement->setAttribute('src', 'javascript:false'); } if (\in_array($sTagNameLower, array('a', 'form', 'area'))) { $oElement->setAttribute('target', '_blank'); } if (\in_array($sTagNameLower, array('a', 'form', 'area', 'input', 'button', 'textarea'))) { $oElement->setAttribute('tabindex', '-1'); } // if ('blockquote' === $sTagNameLower) // { // $oElement->removeAttribute('style'); // } @$oElement->removeAttribute('id'); @$oElement->removeAttribute('class'); @$oElement->removeAttribute('contenteditable'); @$oElement->removeAttribute('designmode'); @$oElement->removeAttribute('data-bind'); @$oElement->removeAttribute('xmlns'); if ($oElement->hasAttribute('src')) { $sSrc = \trim($oElement->getAttribute('src')); $oElement->removeAttribute('src'); if (\in_array($sSrc, $aContentLocationUrls)) { $oElement->setAttribute('data-x-src-location', $sSrc); $aFoundedContentLocationUrls[] = $sSrc; } else if ('cid:' === \strtolower(\substr($sSrc, 0, 4))) { $oElement->setAttribute('data-x-src-cid', \substr($sSrc, 4)); $aFoundCIDs[] = \substr($sSrc, 4); } else { if (\preg_match('/http[s]?:\/\//i', $sSrc)) { if ($bDoNotReplaceExternalUrl) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-src', $sSrc); } $bHasExternals = true; } else if ('data:image/' === \strtolower(\substr(\trim($sSrc), 0, 11))) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-broken-src', $sSrc); } } } $sBackground = $oElement->hasAttribute('background') ? \trim($oElement->getAttribute('background')) : ''; $sBackgroundColor = $oElement->hasAttribute('bgcolor') ? \trim($oElement->getAttribute('bgcolor')) : ''; if (!empty($sBackground) || !empty($sBackgroundColor)) { $aStyles = array(); $sStyles = $oElement->hasAttribute('style') ? $oElement->getAttribute('style') : ''; if (!empty($sBackground)) { $aStyles[] = 'background-image: url(\''.$sBackground.'\')'; $oElement->removeAttribute('background'); } if (!empty($sBackgroundColor)) { $aStyles[] = 'background-color: '.$sBackgroundColor; $oElement->removeAttribute('bgcolor'); } $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles.'; ').\implode('; ', $aStyles)); } if ($oElement->hasAttribute('style')) { $oElement->setAttribute('style', \MailSo\Base\HtmlUtils::ClearStyle($oElement->getAttribute('style'), $oElement, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl)); } } $sResult = $oDom->saveHTML(); } unset($oDom); $sResult = \MailSo\Base\HtmlUtils::ClearTags($sResult); $sHtmlAttrs = $sBodyAttrs = ''; $sResult = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sResult, $sHtmlAttrs, $sBodyAttrs); $sResult = ''; $aNextText[] = \substr(\ltrim($sTextLine), 4); } else if (!$bStart && $bIn) { $bIn = false; $aNextText[] = ''; $aNextText[] = $sTextLine; } else if ($bStart && $bIn) { $aNextText[] = \substr(\ltrim($sTextLine), 4); } else { $aNextText[] = $sTextLine; } } if ($bIn) { $bIn = false; $aNextText[] = ''; } $aText = $aNextText; } while ($bDo); $sText = \join("\n", $aText); unset($aText); $sText = \preg_replace('/[\n][ ]+/', "\n", $sText); // $sText = \preg_replace('/[\s]+([\s])/', '\\1', $sText); $sText = \preg_replace('/
[\s]+/i', '', $sText); $sText = \preg_replace('/[\s]+<\/blockquote>/i', '', $sText); $sText = \preg_replace('/<\/blockquote>([\n]{0,2})/i', '\\1', $sText); $sText = \preg_replace('/[\n]{3,}/', "\n\n", $sText); $sText = \strtr($sText, array( "\n" => "
", "\t" => ' ', ' ' => ' ' )); return $sText; } /** * @param string $sText * * @return string */ public static function ConvertHtmlToPlain($sText) { $sText = trim(stripslashes($sText)); $sText = preg_replace('/[\s]+/', ' ', $sText); $sText = preg_replace(array( "/\r/", "/[\n\t]+/", '/