2019-12-11 04:09:51 +08:00
|
|
|
#include <Python.h>
|
|
|
|
|
|
|
|
static PyObject* markup;
|
|
|
|
|
|
|
|
static int
|
|
|
|
init_constants(void)
|
|
|
|
{
|
|
|
|
PyObject *module;
|
|
|
|
|
|
|
|
/* import markup type so that we can mark the return value */
|
|
|
|
module = PyImport_ImportModule("markupsafe");
|
|
|
|
if (!module)
|
|
|
|
return 0;
|
|
|
|
markup = PyObject_GetAttrString(module, "Markup");
|
|
|
|
Py_DECREF(module);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define GET_DELTA(inp, inp_end, delta) \
|
2022-01-24 12:07:52 +08:00
|
|
|
while (inp < inp_end) { \
|
|
|
|
switch (*inp++) { \
|
|
|
|
case '"': \
|
|
|
|
case '\'': \
|
|
|
|
case '&': \
|
|
|
|
delta += 4; \
|
|
|
|
break; \
|
|
|
|
case '<': \
|
|
|
|
case '>': \
|
|
|
|
delta += 3; \
|
|
|
|
break; \
|
|
|
|
} \
|
2019-12-11 04:09:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#define DO_ESCAPE(inp, inp_end, outp) \
|
2022-01-24 12:07:52 +08:00
|
|
|
{ \
|
|
|
|
Py_ssize_t ncopy = 0; \
|
|
|
|
while (inp < inp_end) { \
|
|
|
|
switch (*inp) { \
|
|
|
|
case '"': \
|
2019-12-11 04:09:51 +08:00
|
|
|
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
|
|
|
|
outp += ncopy; ncopy = 0; \
|
2022-01-24 12:07:52 +08:00
|
|
|
*outp++ = '&'; \
|
|
|
|
*outp++ = '#'; \
|
|
|
|
*outp++ = '3'; \
|
|
|
|
*outp++ = '4'; \
|
|
|
|
*outp++ = ';'; \
|
|
|
|
break; \
|
|
|
|
case '\'': \
|
2019-12-11 04:09:51 +08:00
|
|
|
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
|
|
|
|
outp += ncopy; ncopy = 0; \
|
2022-01-24 12:07:52 +08:00
|
|
|
*outp++ = '&'; \
|
|
|
|
*outp++ = '#'; \
|
|
|
|
*outp++ = '3'; \
|
|
|
|
*outp++ = '9'; \
|
|
|
|
*outp++ = ';'; \
|
|
|
|
break; \
|
|
|
|
case '&': \
|
2019-12-11 04:09:51 +08:00
|
|
|
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
|
|
|
|
outp += ncopy; ncopy = 0; \
|
2022-01-24 12:07:52 +08:00
|
|
|
*outp++ = '&'; \
|
|
|
|
*outp++ = 'a'; \
|
|
|
|
*outp++ = 'm'; \
|
|
|
|
*outp++ = 'p'; \
|
|
|
|
*outp++ = ';'; \
|
|
|
|
break; \
|
|
|
|
case '<': \
|
2019-12-11 04:09:51 +08:00
|
|
|
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
|
|
|
|
outp += ncopy; ncopy = 0; \
|
2022-01-24 12:07:52 +08:00
|
|
|
*outp++ = '&'; \
|
|
|
|
*outp++ = 'l'; \
|
|
|
|
*outp++ = 't'; \
|
|
|
|
*outp++ = ';'; \
|
|
|
|
break; \
|
|
|
|
case '>': \
|
2019-12-11 04:09:51 +08:00
|
|
|
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
|
|
|
|
outp += ncopy; ncopy = 0; \
|
2022-01-24 12:07:52 +08:00
|
|
|
*outp++ = '&'; \
|
|
|
|
*outp++ = 'g'; \
|
|
|
|
*outp++ = 't'; \
|
|
|
|
*outp++ = ';'; \
|
|
|
|
break; \
|
|
|
|
default: \
|
2019-12-11 04:09:51 +08:00
|
|
|
ncopy++; \
|
2022-01-24 12:07:52 +08:00
|
|
|
} \
|
|
|
|
inp++; \
|
|
|
|
} \
|
2019-12-11 04:09:51 +08:00
|
|
|
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject*
|
|
|
|
escape_unicode_kind1(PyUnicodeObject *in)
|
|
|
|
{
|
|
|
|
Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
|
|
|
|
Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
|
|
|
|
Py_UCS1 *outp;
|
|
|
|
PyObject *out;
|
|
|
|
Py_ssize_t delta = 0;
|
|
|
|
|
|
|
|
GET_DELTA(inp, inp_end, delta);
|
|
|
|
if (!delta) {
|
|
|
|
Py_INCREF(in);
|
|
|
|
return (PyObject*)in;
|
|
|
|
}
|
|
|
|
|
|
|
|
out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
|
|
|
|
PyUnicode_IS_ASCII(in) ? 127 : 255);
|
|
|
|
if (!out)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
inp = PyUnicode_1BYTE_DATA(in);
|
|
|
|
outp = PyUnicode_1BYTE_DATA(out);
|
|
|
|
DO_ESCAPE(inp, inp_end, outp);
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject*
|
|
|
|
escape_unicode_kind2(PyUnicodeObject *in)
|
|
|
|
{
|
|
|
|
Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
|
|
|
|
Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
|
|
|
|
Py_UCS2 *outp;
|
|
|
|
PyObject *out;
|
|
|
|
Py_ssize_t delta = 0;
|
|
|
|
|
|
|
|
GET_DELTA(inp, inp_end, delta);
|
|
|
|
if (!delta) {
|
|
|
|
Py_INCREF(in);
|
|
|
|
return (PyObject*)in;
|
|
|
|
}
|
|
|
|
|
|
|
|
out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
|
|
|
|
if (!out)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
inp = PyUnicode_2BYTE_DATA(in);
|
|
|
|
outp = PyUnicode_2BYTE_DATA(out);
|
|
|
|
DO_ESCAPE(inp, inp_end, outp);
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject*
|
|
|
|
escape_unicode_kind4(PyUnicodeObject *in)
|
|
|
|
{
|
|
|
|
Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
|
|
|
|
Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
|
|
|
|
Py_UCS4 *outp;
|
|
|
|
PyObject *out;
|
|
|
|
Py_ssize_t delta = 0;
|
|
|
|
|
|
|
|
GET_DELTA(inp, inp_end, delta);
|
|
|
|
if (!delta) {
|
|
|
|
Py_INCREF(in);
|
|
|
|
return (PyObject*)in;
|
|
|
|
}
|
|
|
|
|
|
|
|
out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
|
|
|
|
if (!out)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
inp = PyUnicode_4BYTE_DATA(in);
|
|
|
|
outp = PyUnicode_4BYTE_DATA(out);
|
|
|
|
DO_ESCAPE(inp, inp_end, outp);
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject*
|
|
|
|
escape_unicode(PyUnicodeObject *in)
|
|
|
|
{
|
|
|
|
if (PyUnicode_READY(in))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
switch (PyUnicode_KIND(in)) {
|
|
|
|
case PyUnicode_1BYTE_KIND:
|
|
|
|
return escape_unicode_kind1(in);
|
|
|
|
case PyUnicode_2BYTE_KIND:
|
|
|
|
return escape_unicode_kind2(in);
|
|
|
|
case PyUnicode_4BYTE_KIND:
|
|
|
|
return escape_unicode_kind4(in);
|
|
|
|
}
|
|
|
|
assert(0); /* shouldn't happen */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static PyObject*
|
|
|
|
escape(PyObject *self, PyObject *text)
|
|
|
|
{
|
|
|
|
static PyObject *id_html;
|
|
|
|
PyObject *s = NULL, *rv = NULL, *html;
|
|
|
|
|
|
|
|
if (id_html == NULL) {
|
|
|
|
id_html = PyUnicode_InternFromString("__html__");
|
|
|
|
if (id_html == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we don't have to escape integers, bools or floats */
|
|
|
|
if (PyLong_CheckExact(text) ||
|
2022-01-24 12:07:52 +08:00
|
|
|
PyFloat_CheckExact(text) || PyBool_Check(text) ||
|
|
|
|
text == Py_None)
|
2019-12-11 04:09:51 +08:00
|
|
|
return PyObject_CallFunctionObjArgs(markup, text, NULL);
|
|
|
|
|
|
|
|
/* if the object has an __html__ method that performs the escaping */
|
|
|
|
html = PyObject_GetAttr(text ,id_html);
|
|
|
|
if (html) {
|
|
|
|
s = PyObject_CallObject(html, NULL);
|
|
|
|
Py_DECREF(html);
|
|
|
|
if (s == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/* Convert to Markup object */
|
|
|
|
rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
|
|
|
|
Py_DECREF(s);
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* otherwise make the object unicode if it isn't, then escape */
|
|
|
|
PyErr_Clear();
|
|
|
|
if (!PyUnicode_Check(text)) {
|
|
|
|
PyObject *unicode = PyObject_Str(text);
|
|
|
|
if (!unicode)
|
|
|
|
return NULL;
|
|
|
|
s = escape_unicode((PyUnicodeObject*)unicode);
|
|
|
|
Py_DECREF(unicode);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
s = escape_unicode((PyUnicodeObject*)text);
|
|
|
|
|
|
|
|
/* convert the unicode string into a markup object. */
|
|
|
|
rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
|
|
|
|
Py_DECREF(s);
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject*
|
|
|
|
escape_silent(PyObject *self, PyObject *text)
|
|
|
|
{
|
|
|
|
if (text != Py_None)
|
|
|
|
return escape(self, text);
|
|
|
|
return PyObject_CallFunctionObjArgs(markup, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static PyObject*
|
2022-01-24 12:07:52 +08:00
|
|
|
soft_str(PyObject *self, PyObject *s)
|
2019-12-11 04:09:51 +08:00
|
|
|
{
|
|
|
|
if (!PyUnicode_Check(s))
|
|
|
|
return PyObject_Str(s);
|
|
|
|
Py_INCREF(s);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-01-24 12:07:52 +08:00
|
|
|
static PyMethodDef module_methods[] = {
|
|
|
|
{
|
|
|
|
"escape",
|
|
|
|
(PyCFunction)escape,
|
|
|
|
METH_O,
|
|
|
|
"Replace the characters ``&``, ``<``, ``>``, ``'``, and ``\"`` in"
|
|
|
|
" the string with HTML-safe sequences. Use this if you need to display"
|
|
|
|
" text that might contain such characters in HTML.\n\n"
|
|
|
|
"If the object has an ``__html__`` method, it is called and the"
|
|
|
|
" return value is assumed to already be safe for HTML.\n\n"
|
|
|
|
":param s: An object to be converted to a string and escaped.\n"
|
|
|
|
":return: A :class:`Markup` string with the escaped text.\n"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"escape_silent",
|
|
|
|
(PyCFunction)escape_silent,
|
|
|
|
METH_O,
|
|
|
|
"Like :func:`escape` but treats ``None`` as the empty string."
|
|
|
|
" Useful with optional values, as otherwise you get the string"
|
|
|
|
" ``'None'`` when the value is ``None``.\n\n"
|
|
|
|
">>> escape(None)\n"
|
|
|
|
"Markup('None')\n"
|
|
|
|
">>> escape_silent(None)\n"
|
|
|
|
"Markup('')\n"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"soft_str",
|
|
|
|
(PyCFunction)soft_str,
|
|
|
|
METH_O,
|
|
|
|
"Convert an object to a string if it isn't already. This preserves"
|
|
|
|
" a :class:`Markup` string rather than converting it back to a basic"
|
|
|
|
" string, so it will still be marked as safe and won't be escaped"
|
|
|
|
" again.\n\n"
|
|
|
|
">>> value = escape(\"<User 1>\")\n"
|
|
|
|
">>> value\n"
|
|
|
|
"Markup('<User 1>')\n"
|
|
|
|
">>> escape(str(value))\n"
|
|
|
|
"Markup('&lt;User 1&gt;')\n"
|
|
|
|
">>> escape(soft_str(value))\n"
|
|
|
|
"Markup('<User 1>')\n"
|
|
|
|
},
|
|
|
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
|
|
|
};
|
2019-12-11 04:09:51 +08:00
|
|
|
|
|
|
|
static struct PyModuleDef module_definition = {
|
2022-01-24 12:07:52 +08:00
|
|
|
PyModuleDef_HEAD_INIT,
|
2019-12-11 04:09:51 +08:00
|
|
|
"markupsafe._speedups",
|
|
|
|
NULL,
|
|
|
|
-1,
|
|
|
|
module_methods,
|
|
|
|
NULL,
|
|
|
|
NULL,
|
|
|
|
NULL,
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
PyMODINIT_FUNC
|
|
|
|
PyInit__speedups(void)
|
|
|
|
{
|
|
|
|
if (!init_constants())
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return PyModule_Create(&module_definition);
|
|
|
|
}
|