encoding = 'UTF-8'; $oDom->formatOutput = false; @$oDom->loadHTML('<'.'?xml version="1.0" encoding="utf-8"?'.'>'. '
'.$sText.''); return $oDom; } /** * @param string $sHtml * @param string $sHtmlAttrs = ' * @param string $sBodyAttrs = '' * * @return string */ public static function ClearBodyAndHtmlTag($sHtml, &$sHtmlAttrs = '', &$sBodyAttrs = '') { $aMatch = array(); if (preg_match('/]+)>/im', $sHtml, $aMatch) && !empty($aMatch[1])) { $sHtmlAttrs = $aMatch[1]; } $aMatch = array(); if (preg_match('/]+)>/im', $sHtml, $aMatch) && !empty($aMatch[1])) { $sBodyAttrs = $aMatch[1]; } $sHtml = \preg_replace('/]*)>/im', '', $sHtml); $sHtml = \preg_replace('/<\/body>/im', '', $sHtml); $sHtml = \preg_replace('/]*)>/im', '', $sHtml); $sHtml = \preg_replace('/<\/html>/im', '', $sHtml); return $sHtml; } /** * @param string $sHtml * * @return string */ public static function ClearTags($sHtml) { $aRemoveTags = array( 'head', 'link', 'base', 'meta', 'title', 'style', 'script', 'bgsound', 'keygen', 'source', 'object', 'embed', 'applet', 'mocha', 'iframe', 'frame', 'frameset', 'video', 'audio' ); $aToRemove = array( '/]*>/msi', '/<\?xml [^>]*\?>/msi' ); foreach ($aRemoveTags as $sTag) { $aToRemove[] = '\'<'.$sTag.'[^>]*>.*?[\s]*'.$sTag.'>\'msi'; $aToRemove[] = '\'<'.$sTag.'[^>]*>\'msi'; $aToRemove[] = '\'[\s]*'.$sTag.'[^>]*>\'msi'; } return \preg_replace($aToRemove, '', $sHtml); } /** * @param string $sHtml * * @return string */ public static function ClearOn($sHtml) { $aToReplace = array( '/on(Blur)/si', '/on(Change)/si', '/on(Click)/si', '/on(DblClick)/si', '/on(Error)/si', '/on(Focus)/si', '/on(FormChange)/si', '/on(KeyDown)/si', '/on(KeyPress)/si', '/on(KeyUp)/si', '/on(Load)/si', '/on(MouseDown)/si', '/on(MouseEnter)/si', '/on(MouseLeave)/si', '/on(MouseMove)/si', '/on(MouseOut)/si', '/on(MouseOver)/si', '/on(MouseUp)/si', '/on(Move)/si', '/on(Resize)/si', '/on(ResizeEnd)/si', '/on(ResizeStart)/si', '/on(Scroll)/si', '/on(Select)/si', '/on(Submit)/si', '/on(Unload)/si' ); return \preg_replace($aToReplace, 'оn\\1', $sHtml); } /** * * @param string $sStyle * @param \DOMElement $oElement * @param bool $bHasExternals * @param array $aFoundCIDs * @param array $aContentLocationUrls * @param array $aFoundedContentLocationUrls * @param bool $bDoNotReplaceExternalUrl = false * @param callback|null $fAdditionalExternalFilter = null * * @return string */ public static function ClearStyle($sStyle, $oElement, &$bHasExternals, &$aFoundCIDs, $aContentLocationUrls, &$aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl = false, $fAdditionalExternalFilter = null) { $sStyle = \trim($sStyle); $aOutStyles = array(); $aStyles = \explode(';', $sStyle); if ($fAdditionalExternalFilter && !\is_callable($fAdditionalExternalFilter)) { $fAdditionalExternalFilter = null; } $aMatch = array(); foreach ($aStyles as $sStyleItem) { $aStyleValue = \explode(':', $sStyleItem, 2); $sName = \trim(\strtolower($aStyleValue[0])); $sValue = isset($aStyleValue[1]) ? \trim($aStyleValue[1]) : ''; if ('position' === $sName && 'fixed' === \strtolower($sValue)) { $sValue = 'absolute'; } if (0 === \strlen($sName) || 0 === \strlen($sValue)) { continue; } $sStyleItem = $sName.': '.$sValue; $aStyleValue = array($sName, $sValue); /*if (\in_array($sName, array('position', 'left', 'right', 'top', 'bottom', 'behavior', 'cursor'))) { // skip } else */if (\in_array($sName, array('behavior', 'pointer-events')) || ('cursor' === $sName && !\in_array(\strtolower($sValue), array('none', 'cursor'))) || ('display' === $sName && 'none' === \strtolower($sValue)) || \preg_match('/expression/i', $sValue) || ('text-indent' === $sName && '-' === \substr(trim($sValue), 0, 1)) ) { // skip } else if (\in_array($sName, array('background-image', 'background', 'list-style-image', 'content')) && \preg_match('/url[\s]?\(([^)]+)\)/im', $sValue, $aMatch) && !empty($aMatch[1])) { $sFullUrl = \trim($aMatch[0], '"\' '); $sUrl = \trim($aMatch[1], '"\' '); $sStyleValue = \trim(\preg_replace('/[\s]+/', ' ', \str_replace($sFullUrl, '', $sValue))); $sStyleItem = empty($sStyleValue) ? '' : $sName.': '.$sStyleValue; if ('cid:' === \strtolower(\substr($sUrl, 0, 4))) { if ($oElement) { $oElement->setAttribute('data-x-style-cid-name', 'background' === $sName ? 'background-image' : $sName); $oElement->setAttribute('data-x-style-cid', \substr($sUrl, 4)); $aFoundCIDs[] = \substr($sUrl, 4); } } else { if ($oElement) { if (\preg_match('/http[s]?:\/\//i', $sUrl)) { $bHasExternals = true; if (!$bDoNotReplaceExternalUrl) { if (\in_array($sName, array('background-image', 'list-style-image', 'content'))) { $sStyleItem = ''; } $sTemp = ''; if ($oElement->hasAttribute('data-x-style-url')) { $sTemp = \trim($oElement->getAttribute('data-x-style-url')); } $sTemp = empty($sTemp) ? '' : (';' === \substr($sTemp, -1) ? $sTemp.' ' : $sTemp.'; '); $oElement->setAttribute('data-x-style-url', \trim($sTemp. ('background' === $sName ? 'background-image' : $sName).': '.$sFullUrl, ' ;')); if ($fAdditionalExternalFilter) { $sAdditionalResult = \call_user_func($fAdditionalExternalFilter, $sUrl); if (0 < \strlen($sAdditionalResult)) { $oElement->setAttribute('data-x-additional-style-url', ('background' === $sName ? 'background-image' : $sName).': url('.$sAdditionalResult.')'); } } } } else if ('data:image/' !== \strtolower(\substr(\trim($sUrl), 0, 11))) { $oElement->setAttribute('data-x-broken-style-src', $sFullUrl); } } } if (!empty($sStyleItem)) { $aOutStyles[] = $sStyleItem; } } else if ('height' === $sName) { // $aOutStyles[] = 'min-'.ltrim($sStyleItem); $aOutStyles[] = $sStyleItem; } else { $aOutStyles[] = $sStyleItem; } } return \implode(';', $aOutStyles); } /** * @param \DOMDocument $oDom */ public static function FindLinksInDOM(&$oDom) { $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as /* @var $oElement \DOMElement */ $oElement) { $sTagNameLower = \strtolower($oElement->tagName); $sParentTagNameLower = isset($oElement->parentNode) && isset($oElement->parentNode->tagName) ? \strtolower($oElement->parentNode->tagName) : ''; if (!\in_array($sTagNameLower, array('html', 'meta', 'head', 'style', 'script', 'img', 'button', 'input', 'textarea', 'a')) && 'a' !== $sParentTagNameLower && $oElement->childNodes && 0 < $oElement->childNodes->length) { $oSubItem = null; $aTextNodes = array(); $iIndex = $oElement->childNodes->length - 1; while ($iIndex > -1) { $oSubItem = $oElement->childNodes->item($iIndex); if ($oSubItem && XML_TEXT_NODE === $oSubItem->nodeType) { $aTextNodes[] = $oSubItem; } $iIndex--; } unset($oSubItem); foreach ($aTextNodes as $oTextNode) { if ($oTextNode && 0 < \strlen($oTextNode->wholeText)/* && \preg_match('/http[s]?:\/\//i', $oTextNode->wholeText)*/) { $sText = \MailSo\Base\LinkFinder::NewInstance() ->Text($oTextNode->wholeText) ->UseDefaultWrappers(true) ->CompileText() ; $oSubDom = \MailSo\Base\HtmlUtils::GetDomFromText(''.$sText.''); if ($oSubDom) { $oBodyNodes = $oSubDom->getElementsByTagName('body'); if ($oBodyNodes && 0 < $oBodyNodes->length) { $oBodyChildNodes = $oBodyNodes->item(0)->childNodes; if ($oBodyChildNodes && $oBodyChildNodes->length) { for ($iIndex = 0, $iLen = $oBodyChildNodes->length; $iIndex < $iLen; $iIndex++) { $oSubItem = $oBodyChildNodes->item($iIndex); if ($oSubItem) { if (XML_ELEMENT_NODE === $oSubItem->nodeType && 'a' === \strtolower($oSubItem->tagName)) { $oLink = $oDom->createElement('a', \str_replace(':', \MailSo\Base\HtmlUtils::$KOS, \htmlspecialchars($oSubItem->nodeValue))); $sHref = $oSubItem->getAttribute('href'); if ($sHref) { $oLink->setAttribute('href', $sHref); } $oElement->insertBefore($oLink, $oTextNode); } else { $oElement->insertBefore($oDom->importNode($oSubItem), $oTextNode); } } } $oElement->removeChild($oTextNode); } } unset($oBodyNodes); } unset($oSubDom, $sText); } } } } unset($aNodes); } /** * @param string $sHtml * @param bool $bDoNotReplaceExternalUrl = false * @param bool $bFindLinksInHtml = false * * @return string */ public static function ClearHtmlSimple($sHtml, $bDoNotReplaceExternalUrl = false, $bFindLinksInHtml = false) { $bHasExternals = false; $aFoundCIDs = array(); $aContentLocationUrls = array(); $aFoundedContentLocationUrls = array(); return \MailSo\Base\HtmlUtils::ClearHtml($sHtml, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl, $bFindLinksInHtml); } /** * @param string $sHtml * @param bool $bHasExternals = false * @param array $aFoundCIDs = array() * @param array $aContentLocationUrls = array() * @param array $aFoundedContentLocationUrls = array() * @param bool $bDoNotReplaceExternalUrl = false * @param bool $bFindLinksInHtml = false * @param callback|null $fAdditionalExternalFilter = null * * @return string */ public static function ClearHtml($sHtml, &$bHasExternals = false, &$aFoundCIDs = array(), $aContentLocationUrls = array(), &$aFoundedContentLocationUrls = array(), $bDoNotReplaceExternalUrl = false, $bFindLinksInHtml = false, $fAdditionalExternalFilter = null) { $sResult = ''; $sHtml = null === $sHtml ? '' : (string) $sHtml; $sHtml = \trim($sHtml); if (0 === \strlen($sHtml)) { return ''; } if ($fAdditionalExternalFilter && !\is_callable($fAdditionalExternalFilter)) { $fAdditionalExternalFilter = null; } $bHasExternals = false; $sHtml = \MailSo\Base\HtmlUtils::ClearTags($sHtml); $sHtml = \MailSo\Base\HtmlUtils::ClearOn($sHtml); $sHtmlAttrs = $sBodyAttrs = ''; $sHtml = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sHtml, $sHtmlAttrs, $sBodyAttrs); // Dom Part $oDom = \MailSo\Base\HtmlUtils::GetDomFromText($sHtml, $sHtmlAttrs, $sBodyAttrs); unset($sHtml); if ($oDom) { if ($bFindLinksInHtml) { \MailSo\Base\HtmlUtils::FindLinksInDOM($oDom); } $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as /* @var $oElement \DOMElement */ $oElement) { if (\in_array(\strtolower($oElement->tagName), array('svg', 'head', 'link', 'base', 'meta', 'title', 'style', 'script', 'bgsound', 'keygen', 'source', 'object', 'embed', 'applet', 'mocha', 'iframe', 'frame', 'frameset', 'video', 'audio')) && isset($oElement->parentNode)) { @$oElement->parentNode->removeChild($oElement); } } $aNodes = $oDom->getElementsByTagName('*'); foreach ($aNodes as /* @var $oElement \DOMElement */ $oElement) { $sTagNameLower = \strtolower($oElement->tagName); // convert body attributes to styles if ('body' === $sTagNameLower) { $aAttrs = array( 'text' => '', 'topmargin' => '', 'leftmargin' => '', 'bottommargin' => '', 'rightmargin' => '' ); if (isset($oElement->attributes)) { foreach ($oElement->attributes as $sAttributeName => /* @var $oAttributeNode \DOMNode */ $oAttributeNode) { if ($oAttributeNode && isset($oAttributeNode->nodeValue)) { $sAttributeNameLower = \strtolower($sAttributeName); if (isset($aAttrs[$sAttributeNameLower]) && '' === $aAttrs[$sAttributeNameLower]) { $aAttrs[$sAttributeNameLower] = array($sAttributeName, \trim($oAttributeNode->nodeValue)); } } } } $aStyles = array(); foreach ($aAttrs as $sIndex => $aItem) { if (\is_array($aItem)) { $oElement->removeAttribute($aItem[0]); switch ($sIndex) { case 'text': $aStyles[] = 'color: '.$aItem[1]; break; case 'topmargin': $aStyles[] = 'margin-top: '.((int) $aItem[1]).'px'; break; case 'leftmargin': $aStyles[] = 'margin-left: '.((int) $aItem[1]).'px'; break; case 'bottommargin': $aStyles[] = 'margin-bottom: '.((int) $aItem[1]).'px'; break; case 'rightmargin': $aStyles[] = 'margin-right: '.((int) $aItem[1]).'px'; break; } } } if (0 < \count($aStyles)) { $sStyles = $oElement->hasAttribute('style') ? $oElement->getAttribute('style') : ''; $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles.'; ').\implode('; ', $aStyles)); } } if ('iframe' === $sTagNameLower || 'frame' === $sTagNameLower) { $oElement->setAttribute('src', 'javascript:false'); } if (\in_array($sTagNameLower, array('a', 'form', 'area'))) { $oElement->setAttribute('target', '_blank'); } if (\in_array($sTagNameLower, array('a', 'form', 'area', 'input', 'button', 'textarea'))) { $oElement->setAttribute('tabindex', '-1'); } // if ('blockquote' === $sTagNameLower) // { // $oElement->removeAttribute('style'); // } foreach (array( 'id', 'class', 'contenteditable', 'designmode', 'formaction', 'data-bind', 'xmlns', 'srcset' ) as $sAttr) { @$oElement->removeAttribute($sAttr); } foreach (array( 'load', 'blur', 'error', 'focus', 'formchange', 'change', 'click', 'dblclick', 'keydown', 'keypress', 'keyup', 'mousedown', 'mouseenter', 'mouseleave', 'mousemove', 'mouseout', 'mouseover', 'mouseup', 'move', 'resize', 'resizeend', 'resizestart', 'scroll', 'select', 'submit', 'upload' ) as $sAttr) { @$oElement->removeAttribute('on'.$sAttr); } if ($oElement->hasAttribute('href')) { $sHref = \trim($oElement->getAttribute('href')); if (!\preg_match('/^(http[s]?|ftp|skype|mailto):/i', $sHref)) { $oElement->setAttribute('data-x-broken-href', $sHref); $oElement->setAttribute('href', 'javascript:false'); } else if ('a' === $sTagNameLower) { $oElement->setAttribute('rel', 'external'); } } if ($oElement->hasAttribute('src')) { $sSrc = \trim($oElement->getAttribute('src')); $oElement->removeAttribute('src'); if (\in_array($sSrc, $aContentLocationUrls)) { $oElement->setAttribute('data-x-src-location', $sSrc); $aFoundedContentLocationUrls[] = $sSrc; } else if ('cid:' === \strtolower(\substr($sSrc, 0, 4))) { $oElement->setAttribute('data-x-src-cid', \substr($sSrc, 4)); $aFoundCIDs[] = \substr($sSrc, 4); } else { if (\preg_match('/http[s]?:\/\//i', $sSrc)) { if ($bDoNotReplaceExternalUrl) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-src', $sSrc); if ($fAdditionalExternalFilter) { $sCallResult = \call_user_func($fAdditionalExternalFilter, $sSrc); if (0 < \strlen($sCallResult)) { $oElement->setAttribute('data-x-additional-src', $sCallResult); } } } $bHasExternals = true; } else if ('data:image/' === \strtolower(\substr(\trim($sSrc), 0, 11))) { $oElement->setAttribute('src', $sSrc); } else { $oElement->setAttribute('data-x-broken-src', $sSrc); } } } $sBackground = $oElement->hasAttribute('background') ? \trim($oElement->getAttribute('background')) : ''; $sBackgroundColor = $oElement->hasAttribute('bgcolor') ? \trim($oElement->getAttribute('bgcolor')) : ''; if (!empty($sBackground) || !empty($sBackgroundColor)) { $aStyles = array(); $sStyles = $oElement->hasAttribute('style') ? $oElement->getAttribute('style') : ''; if (!empty($sBackground)) { $aStyles[] = 'background-image: url(\''.$sBackground.'\')'; $oElement->removeAttribute('background'); } if (!empty($sBackgroundColor)) { $aStyles[] = 'background-color: '.$sBackgroundColor; $oElement->removeAttribute('bgcolor'); } $oElement->setAttribute('style', (empty($sStyles) ? '' : $sStyles.'; ').\implode('; ', $aStyles)); } if ($oElement->hasAttribute('style')) { $oElement->setAttribute('style', \MailSo\Base\HtmlUtils::ClearStyle($oElement->getAttribute('style'), $oElement, $bHasExternals, $aFoundCIDs, $aContentLocationUrls, $aFoundedContentLocationUrls, $bDoNotReplaceExternalUrl, $fAdditionalExternalFilter)); } } $sResult = $oDom->saveHTML(); } unset($oDom); $sResult = \MailSo\Base\HtmlUtils::ClearTags($sResult); $sHtmlAttrs = $sBodyAttrs = ''; $sResult = \MailSo\Base\HtmlUtils::ClearBodyAndHtmlTag($sResult, $sHtmlAttrs, $sBodyAttrs); $sResult = ''; $aNextText[] = \substr(\ltrim($sTextLine), 4); } else if (!$bStart && $bIn) { $bIn = false; $aNextText[] = ''; $aNextText[] = $sTextLine; } else if ($bStart && $bIn) { $aNextText[] = \substr(\ltrim($sTextLine), 4); } else { $aNextText[] = $sTextLine; } } if ($bIn) { $bIn = false; $aNextText[] = ''; } $aText = $aNextText; } while ($bDo); $sText = \join("\n", $aText); unset($aText); $sText = \preg_replace('/[\n][ ]+/', "\n", $sText); // $sText = \preg_replace('/[\s]+([\s])/', '\\1', $sText); $sText = \preg_replace('/
[\s]+/i', '', $sText); $sText = \preg_replace('/[\s]+<\/blockquote>/i', '', $sText); $sText = \preg_replace('/<\/blockquote>([\n]{0,2})/i', '\\1', $sText); $sText = \preg_replace('/[\n]{3,}/', "\n\n", $sText); $sText = \strtr($sText, array( "\n" => "
", "\t" => ' ', ' ' => ' ' )); return $sText; } /** * @param string $sText * * @return string */ public static function ConvertHtmlToPlain($sText) { $sText = trim(stripslashes($sText)); $sText = preg_replace('/[\s]+/', ' ', $sText); $sText = preg_replace(array( "/\r/", "/[\n\t]+/", '/