bazarr/libs/markdown/serializers.py

# markdown/searializers.py
#
# Add x/html serialization to Elementree
# Taken from ElementTree 1.3 preview with slight modifications
#
# Copyright (c) 1999-2007 by Fredrik Lundh.  All rights reserved.
#
# fredrik@pythonware.com
# http://www.pythonware.com
#
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2007 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------


from __future__ import absolute_import
from __future__ import unicode_literals
from . import util
ElementTree = util.etree.ElementTree
QName = util.etree.QName
if hasattr(util.etree, 'test_comment'):  # pragma: no cover
    Comment = util.etree.test_comment
else:  # pragma: no cover
    Comment = util.etree.Comment
PI = util.etree.PI
ProcessingInstruction = util.etree.ProcessingInstruction

__all__ = ['to_html_string', 'to_xhtml_string']

HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
              "img", "input", "isindex", "link", "meta" "param")

try:
    HTML_EMPTY = set(HTML_EMPTY)
except NameError:  # pragma: no cover
    pass

_namespace_map = {
    # "well-known" namespace prefixes
    "http://www.w3.org/XML/1998/namespace": "xml",
    "http://www.w3.org/1999/xhtml": "html",
    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
    # xml schema
    "http://www.w3.org/2001/XMLSchema": "xs",
    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
    # dublic core
    "http://purl.org/dc/elements/1.1/": "dc",
}


def _raise_serialization_error(text):  # pragma: no cover
    raise TypeError(
        "cannot serialize %r (type %s)" % (text, type(text).__name__)
        )


def _encode(text, encoding):
    try:
        return text.encode(encoding, "xmlcharrefreplace")
    except (TypeError, AttributeError):  # pragma: no cover
        _raise_serialization_error(text)


def _escape_cdata(text):
    # escape character data
    try:
        # it's worth avoiding do-nothing calls for strings that are
        # shorter than 500 character, or so.  assume that's, by far,
        # the most common case in most applications.
        if "&" in text:
            text = text.replace("&", "&amp;")
        if "<" in text:
            text = text.replace("<", "&lt;")
        if ">" in text:
            text = text.replace(">", "&gt;")
        return text
    except (TypeError, AttributeError):  # pragma: no cover
        _raise_serialization_error(text)


def _escape_attrib(text):
    # escape attribute value
    try:
        if "&" in text:
            text = text.replace("&", "&amp;")
        if "<" in text:
            text = text.replace("<", "&lt;")
        if ">" in text:
            text = text.replace(">", "&gt;")
        if "\"" in text:
            text = text.replace("\"", "&quot;")
        if "\n" in text:
            text = text.replace("\n", "&#10;")
        return text
    except (TypeError, AttributeError):  # pragma: no cover
        _raise_serialization_error(text)


def _escape_attrib_html(text):
    # escape attribute value
    try:
        if "&" in text:
            text = text.replace("&", "&amp;")
        if "<" in text:
            text = text.replace("<", "&lt;")
        if ">" in text:
            text = text.replace(">", "&gt;")
        if "\"" in text:
            text = text.replace("\"", "&quot;")
        return text
    except (TypeError, AttributeError):  # pragma: no cover
        _raise_serialization_error(text)


def _serialize_html(write, elem, qnames, namespaces, format):
    tag = elem.tag
    text = elem.text
    if tag is Comment:
        write("<!--%s-->" % _escape_cdata(text))
    elif tag is ProcessingInstruction:
        write("<?%s?>" % _escape_cdata(text))
    else:
        tag = qnames[tag]
        if tag is None:
            if text:
                write(_escape_cdata(text))
            for e in elem:
                _serialize_html(write, e, qnames, None, format)
        else:
            write("<" + tag)
            items = elem.items()
            if items or namespaces:
                items = sorted(items)  # lexical order
                for k, v in items:
                    if isinstance(k, QName):
                        k = k.text
                    if isinstance(v, QName):
                        v = qnames[v.text]
                    else:
                        v = _escape_attrib_html(v)
                    if qnames[k] == v and format == 'html':
                        # handle boolean attributes
                        write(" %s" % v)
                    else:
                        write(" %s=\"%s\"" % (qnames[k], v))
                if namespaces:
                    items = namespaces.items()
                    items.sort(key=lambda x: x[1])  # sort on prefix
                    for v, k in items:
                        if k:
                            k = ":" + k
                        write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
            if format == "xhtml" and tag.lower() in HTML_EMPTY:
                write(" />")
            else:
                write(">")
                if text:
                    if tag.lower() in ["script", "style"]:
                        write(text)
                    else:
                        write(_escape_cdata(text))
                for e in elem:
                    _serialize_html(write, e, qnames, None, format)
                if tag.lower() not in HTML_EMPTY:
                    write("</" + tag + ">")
    if elem.tail:
        write(_escape_cdata(elem.tail))


def _write_html(root,
                encoding=None,
                default_namespace=None,
                format="html"):
    assert root is not None
    data = []
    write = data.append
    qnames, namespaces = _namespaces(root, default_namespace)
    _serialize_html(write, root, qnames, namespaces, format)
    if encoding is None:
        return "".join(data)
    else:
        return _encode("".join(data))


# --------------------------------------------------------------------
# serialization support

def _namespaces(elem, default_namespace=None):
    # identify namespaces used in this tree

    # maps qnames to *encoded* prefix:local names
    qnames = {None: None}

    # maps uri:s to prefixes
    namespaces = {}
    if default_namespace:
        namespaces[default_namespace] = ""

    def add_qname(qname):
        # calculate serialized qname representation
        try:
            if qname[:1] == "{":
                uri, tag = qname[1:].split("}", 1)
                prefix = namespaces.get(uri)
                if prefix is None:
                    prefix = _namespace_map.get(uri)
                    if prefix is None:
                        prefix = "ns%d" % len(namespaces)
                    if prefix != "xml":
                        namespaces[uri] = prefix
                if prefix:
                    qnames[qname] = "%s:%s" % (prefix, tag)
                else:
                    qnames[qname] = tag  # default element
            else:
                if default_namespace:
                    raise ValueError(
                        "cannot use non-qualified names with "
                        "default_namespace option"
                        )
                qnames[qname] = qname
        except TypeError:  # pragma: no cover
            _raise_serialization_error(qname)

    # populate qname and namespaces table
    try:
        iterate = elem.iter
    except AttributeError:
        iterate = elem.getiterator  # cET compatibility
    for elem in iterate():
        tag = elem.tag
        if isinstance(tag, QName) and tag.text not in qnames:
            add_qname(tag.text)
        elif isinstance(tag, util.string_type):
            if tag not in qnames:
                add_qname(tag)
        elif tag is not None and tag is not Comment and tag is not PI:
            _raise_serialization_error(tag)
        for key, value in elem.items():
            if isinstance(key, QName):
                key = key.text
            if key not in qnames:
                add_qname(key)
            if isinstance(value, QName) and value.text not in qnames:
                add_qname(value.text)
        text = elem.text
        if isinstance(text, QName) and text.text not in qnames:
            add_qname(text.text)
    return qnames, namespaces


def to_html_string(element):
    return _write_html(ElementTree(element).getroot(), format="html")


def to_xhtml_string(element):
    return _write_html(ElementTree(element).getroot(), format="xhtml")
Include dependencies and remove requirements.txt 2018-09-17 08:27:00 +08:00			`# markdown/searializers.py`
			`#`
			`# Add x/html serialization to Elementree`
			`# Taken from ElementTree 1.3 preview with slight modifications`
			`#`
			`# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.`
			`#`
			`# fredrik@pythonware.com`
			`# http://www.pythonware.com`
			`#`
			`# --------------------------------------------------------------------`
			`# The ElementTree toolkit is`
			`#`
			`# Copyright (c) 1999-2007 by Fredrik Lundh`
			`#`
			`# By obtaining, using, and/or copying this software and/or its`
			`# associated documentation, you agree that you have read, understood,`
			`# and will comply with the following terms and conditions:`
			`#`
			`# Permission to use, copy, modify, and distribute this software and`
			`# its associated documentation for any purpose and without fee is`
			`# hereby granted, provided that the above copyright notice appears in`
			`# all copies, and that both that copyright notice and this permission`
			`# notice appear in supporting documentation, and that the name of`
			`# Secret Labs AB or the author not be used in advertising or publicity`
			`# pertaining to distribution of the software without specific, written`
			`# prior permission.`
			`#`
			`# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD`
			`# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-`
			`# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR`
			`# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY`
			`# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,`
			`# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS`
			`# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE`
			`# OF THIS SOFTWARE.`
			`# --------------------------------------------------------------------`


			`from __future__ import absolute_import`
			`from __future__ import unicode_literals`
			`from . import util`
			`ElementTree = util.etree.ElementTree`
			`QName = util.etree.QName`
			`if hasattr(util.etree, 'test_comment'): # pragma: no cover`
			`Comment = util.etree.test_comment`
			`else: # pragma: no cover`
			`Comment = util.etree.Comment`
			`PI = util.etree.PI`
			`ProcessingInstruction = util.etree.ProcessingInstruction`

			`__all__ = ['to_html_string', 'to_xhtml_string']`

			`HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",`
			`"img", "input", "isindex", "link", "meta" "param")`

			`try:`
			`HTML_EMPTY = set(HTML_EMPTY)`
			`except NameError: # pragma: no cover`
			`pass`

			`_namespace_map = {`
			`# "well-known" namespace prefixes`
			`"http://www.w3.org/XML/1998/namespace": "xml",`
			`"http://www.w3.org/1999/xhtml": "html",`
			`"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",`
			`"http://schemas.xmlsoap.org/wsdl/": "wsdl",`
			`# xml schema`
			`"http://www.w3.org/2001/XMLSchema": "xs",`
			`"http://www.w3.org/2001/XMLSchema-instance": "xsi",`
			`# dublic core`
			`"http://purl.org/dc/elements/1.1/": "dc",`
			`}`


			`def _raise_serialization_error(text): # pragma: no cover`
			`raise TypeError(`
			`"cannot serialize %r (type %s)" % (text, type(text).__name__)`
			`)`


			`def _encode(text, encoding):`
			`try:`
			`return text.encode(encoding, "xmlcharrefreplace")`
			`except (TypeError, AttributeError): # pragma: no cover`
			`_raise_serialization_error(text)`


			`def _escape_cdata(text):`
			`# escape character data`
			`try:`
			`# it's worth avoiding do-nothing calls for strings that are`
			`# shorter than 500 character, or so. assume that's, by far,`
			`# the most common case in most applications.`
			`if "&" in text:`
			`text = text.replace("&", "&")`
			`if "<" in text:`
			`text = text.replace("<", "<")`
			`if ">" in text:`
			`text = text.replace(">", ">")`
			`return text`
			`except (TypeError, AttributeError): # pragma: no cover`
			`_raise_serialization_error(text)`


			`def _escape_attrib(text):`
			`# escape attribute value`
			`try:`
			`if "&" in text:`
			`text = text.replace("&", "&")`
			`if "<" in text:`
			`text = text.replace("<", "<")`
			`if ">" in text:`
			`text = text.replace(">", ">")`
			`if "\"" in text:`
			`text = text.replace("\"", """)`
			`if "\n" in text:`
			`text = text.replace("\n", " ")`
			`return text`
			`except (TypeError, AttributeError): # pragma: no cover`
			`_raise_serialization_error(text)`


			`def _escape_attrib_html(text):`
			`# escape attribute value`
			`try:`
			`if "&" in text:`
			`text = text.replace("&", "&")`
			`if "<" in text:`
			`text = text.replace("<", "<")`
			`if ">" in text:`
			`text = text.replace(">", ">")`
			`if "\"" in text:`
			`text = text.replace("\"", """)`
			`return text`
			`except (TypeError, AttributeError): # pragma: no cover`
			`_raise_serialization_error(text)`


			`def _serialize_html(write, elem, qnames, namespaces, format):`
			`tag = elem.tag`
			`text = elem.text`
			`if tag is Comment:`
			`write("<!--%s-->" % _escape_cdata(text))`
			`elif tag is ProcessingInstruction:`
			`write("<?%s?>" % _escape_cdata(text))`
			`else:`
			`tag = qnames[tag]`
			`if tag is None:`
			`if text:`
			`write(_escape_cdata(text))`
			`for e in elem:`
			`_serialize_html(write, e, qnames, None, format)`
			`else:`
			`write("<" + tag)`
			`items = elem.items()`
			`if items or namespaces:`
			`items = sorted(items) # lexical order`
			`for k, v in items:`
			`if isinstance(k, QName):`
			`k = k.text`
			`if isinstance(v, QName):`
			`v = qnames[v.text]`
			`else:`
			`v = _escape_attrib_html(v)`
			`if qnames[k] == v and format == 'html':`
			`# handle boolean attributes`
			`write(" %s" % v)`
			`else:`
			`write(" %s=\"%s\"" % (qnames[k], v))`
			`if namespaces:`
			`items = namespaces.items()`
			`items.sort(key=lambda x: x[1]) # sort on prefix`
			`for v, k in items:`
			`if k:`
			`k = ":" + k`
			`write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))`
			`if format == "xhtml" and tag.lower() in HTML_EMPTY:`
			`write(" />")`
			`else:`
			`write(">")`
			`if text:`
			`if tag.lower() in ["script", "style"]:`
			`write(text)`
			`else:`
			`write(_escape_cdata(text))`
			`for e in elem:`
			`_serialize_html(write, e, qnames, None, format)`
			`if tag.lower() not in HTML_EMPTY:`
			`write("</" + tag + ">")`
			`if elem.tail:`
			`write(_escape_cdata(elem.tail))`


			`def _write_html(root,`
			`encoding=None,`
			`default_namespace=None,`
			`format="html"):`
			`assert root is not None`
			`data = []`
			`write = data.append`
			`qnames, namespaces = _namespaces(root, default_namespace)`
			`_serialize_html(write, root, qnames, namespaces, format)`
			`if encoding is None:`
			`return "".join(data)`
			`else:`
			`return _encode("".join(data))`


			`# --------------------------------------------------------------------`
			`# serialization support`

			`def _namespaces(elem, default_namespace=None):`
			`# identify namespaces used in this tree`

			`# maps qnames to encoded prefix:local names`
			`qnames = {None: None}`

			`# maps uri:s to prefixes`
			`namespaces = {}`
			`if default_namespace:`
			`namespaces[default_namespace] = ""`

			`def add_qname(qname):`
			`# calculate serialized qname representation`
			`try:`
			`if qname[:1] == "{":`
			`uri, tag = qname[1:].split("}", 1)`
			`prefix = namespaces.get(uri)`
			`if prefix is None:`
			`prefix = _namespace_map.get(uri)`
			`if prefix is None:`
			`prefix = "ns%d" % len(namespaces)`
			`if prefix != "xml":`
			`namespaces[uri] = prefix`
			`if prefix:`
			`qnames[qname] = "%s:%s" % (prefix, tag)`
			`else:`
			`qnames[qname] = tag # default element`
			`else:`
			`if default_namespace:`
			`raise ValueError(`
			`"cannot use non-qualified names with "`
			`"default_namespace option"`
			`)`
			`qnames[qname] = qname`
			`except TypeError: # pragma: no cover`
			`_raise_serialization_error(qname)`

			`# populate qname and namespaces table`
			`try:`
			`iterate = elem.iter`
			`except AttributeError:`
			`iterate = elem.getiterator # cET compatibility`
			`for elem in iterate():`
			`tag = elem.tag`
			`if isinstance(tag, QName) and tag.text not in qnames:`
			`add_qname(tag.text)`
			`elif isinstance(tag, util.string_type):`
			`if tag not in qnames:`
			`add_qname(tag)`
			`elif tag is not None and tag is not Comment and tag is not PI:`
			`_raise_serialization_error(tag)`
			`for key, value in elem.items():`
			`if isinstance(key, QName):`
			`key = key.text`
			`if key not in qnames:`
			`add_qname(key)`
			`if isinstance(value, QName) and value.text not in qnames:`
			`add_qname(value.text)`
			`text = elem.text`
			`if isinstance(text, QName) and text.text not in qnames:`
			`add_qname(text.text)`
			`return qnames, namespaces`


			`def to_html_string(element):`
			`return _write_html(ElementTree(element).getroot(), format="html")`


			`def to_xhtml_string(element):`
			`return _write_html(ElementTree(element).getroot(), format="xhtml")`