From 8a4a6da287e6efdb2be3b7e69113b90b82598713 Mon Sep 17 00:00:00 2001 From: Jean-Michel Picod Date: Fri, 20 Sep 2024 17:32:18 +0200 Subject: [PATCH] Add missing file which was gitingore'd --- client/deps/liblua/lutf8lib.c | 291 ++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 client/deps/liblua/lutf8lib.c diff --git a/client/deps/liblua/lutf8lib.c b/client/deps/liblua/lutf8lib.c new file mode 100644 index 000000000..3a5b9bc38 --- /dev/null +++ b/client/deps/liblua/lutf8lib.c @@ -0,0 +1,291 @@ +/* +** $Id: lutf8lib.c $ +** Standard library for UTF-8 manipulation +** See Copyright Notice in lua.h +*/ + +#define lutf8lib_c +#define LUA_LIB + +#include "lprefix.h" + + +#include +#include +#include +#include + +#include "lua.h" + +#include "lauxlib.h" +#include "lualib.h" + + +#define MAXUNICODE 0x10FFFFu + +#define MAXUTF 0x7FFFFFFFu + + +#define MSGInvalid "invalid UTF-8 code" + +/* +** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits. +*/ +#if (UINT_MAX >> 30) >= 1 +typedef unsigned int utfint; +#else +typedef unsigned long utfint; +#endif + + +#define iscont(c) (((c) & 0xC0) == 0x80) +#define iscontp(p) iscont(*(p)) + + +/* from strlib */ +/* translate a relative string position: negative means back from end */ +static lua_Integer u_posrelat (lua_Integer pos, size_t len) { + if (pos >= 0) return pos; + else if (0u - (size_t)pos > len) return 0; + else return (lua_Integer)len + pos + 1; +} + + +/* +** Decode one UTF-8 sequence, returning NULL if byte sequence is +** invalid. The array 'limits' stores the minimum value for each +** sequence length, to check for overlong representations. Its first +** entry forces an error for non-ascii bytes with no continuation +** bytes (count == 0). +*/ +static const char *utf8_decode (const char *s, utfint *val, int strict) { + static const utfint limits[] = + {~(utfint)0, 0x80, 0x800, 0x10000u, 0x200000u, 0x4000000u}; + unsigned int c = (unsigned char)s[0]; + utfint res = 0; /* final result */ + if (c < 0x80) /* ascii? */ + res = c; + else { + int count = 0; /* to count number of continuation bytes */ + for (; c & 0x40; c <<= 1) { /* while it needs continuation bytes... */ + unsigned int cc = (unsigned char)s[++count]; /* read next byte */ + if (!iscont(cc)) /* not a continuation byte? */ + return NULL; /* invalid byte sequence */ + res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ + } + res |= ((utfint)(c & 0x7F) << (count * 5)); /* add first byte */ + if (count > 5 || res > MAXUTF || res < limits[count]) + return NULL; /* invalid byte sequence */ + s += count; /* skip continuation bytes read */ + } + if (strict) { + /* check for invalid code points; too large or surrogates */ + if (res > MAXUNICODE || (0xD800u <= res && res <= 0xDFFFu)) + return NULL; + } + if (val) *val = res; + return s + 1; /* +1 to include first byte */ +} + + +/* +** utf8len(s [, i [, j [, lax]]]) --> number of characters that +** start in the range [i,j], or nil + current position if 's' is not +** well formed in that interval +*/ +static int utflen (lua_State *L) { + lua_Integer n = 0; /* counter for the number of characters */ + size_t len; /* string length in bytes */ + const char *s = luaL_checklstring(L, 1, &len); + lua_Integer posi = u_posrelat(luaL_optinteger(L, 2, 1), len); + lua_Integer posj = u_posrelat(luaL_optinteger(L, 3, -1), len); + int lax = lua_toboolean(L, 4); + luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 2, + "initial position out of bounds"); + luaL_argcheck(L, --posj < (lua_Integer)len, 3, + "final position out of bounds"); + while (posi <= posj) { + const char *s1 = utf8_decode(s + posi, NULL, !lax); + if (s1 == NULL) { /* conversion error? */ + luaL_pushfail(L); /* return fail ... */ + lua_pushinteger(L, posi + 1); /* ... and current position */ + return 2; + } + posi = s1 - s; + n++; + } + lua_pushinteger(L, n); + return 1; +} + + +/* +** codepoint(s, [i, [j [, lax]]]) -> returns codepoints for all +** characters that start in the range [i,j] +*/ +static int codepoint (lua_State *L) { + size_t len; + const char *s = luaL_checklstring(L, 1, &len); + lua_Integer posi = u_posrelat(luaL_optinteger(L, 2, 1), len); + lua_Integer pose = u_posrelat(luaL_optinteger(L, 3, posi), len); + int lax = lua_toboolean(L, 4); + int n; + const char *se; + luaL_argcheck(L, posi >= 1, 2, "out of bounds"); + luaL_argcheck(L, pose <= (lua_Integer)len, 3, "out of bounds"); + if (posi > pose) return 0; /* empty interval; return no values */ + if (pose - posi >= INT_MAX) /* (lua_Integer -> int) overflow? */ + return luaL_error(L, "string slice too long"); + n = (int)(pose - posi) + 1; /* upper bound for number of returns */ + luaL_checkstack(L, n, "string slice too long"); + n = 0; /* count the number of returns */ + se = s + pose; /* string end */ + for (s += posi - 1; s < se;) { + utfint code; + s = utf8_decode(s, &code, !lax); + if (s == NULL) + return luaL_error(L, MSGInvalid); + lua_pushinteger(L, code); + n++; + } + return n; +} + + +static void pushutfchar (lua_State *L, int arg) { + lua_Unsigned code = (lua_Unsigned)luaL_checkinteger(L, arg); + luaL_argcheck(L, code <= MAXUTF, arg, "value out of range"); + lua_pushfstring(L, "%U", (long)code); +} + + +/* +** utfchar(n1, n2, ...) -> char(n1)..char(n2)... +*/ +static int utfchar (lua_State *L) { + int n = lua_gettop(L); /* number of arguments */ + if (n == 1) /* optimize common case of single char */ + pushutfchar(L, 1); + else { + int i; + luaL_Buffer b; + luaL_buffinit(L, &b); + for (i = 1; i <= n; i++) { + pushutfchar(L, i); + luaL_addvalue(&b); + } + luaL_pushresult(&b); + } + return 1; +} + + +/* +** offset(s, n, [i]) -> index where n-th character counting from +** position 'i' starts; 0 means character at 'i'. +*/ +static int byteoffset (lua_State *L) { + size_t len; + const char *s = luaL_checklstring(L, 1, &len); + lua_Integer n = luaL_checkinteger(L, 2); + lua_Integer posi = (n >= 0) ? 1 : len + 1; + posi = u_posrelat(luaL_optinteger(L, 3, posi), len); + luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 3, + "position out of bounds"); + if (n == 0) { + /* find beginning of current byte sequence */ + while (posi > 0 && iscontp(s + posi)) posi--; + } + else { + if (iscontp(s + posi)) + return luaL_error(L, "initial position is a continuation byte"); + if (n < 0) { + while (n < 0 && posi > 0) { /* move back */ + do { /* find beginning of previous character */ + posi--; + } while (posi > 0 && iscontp(s + posi)); + n++; + } + } + else { + n--; /* do not move for 1st character */ + while (n > 0 && posi < (lua_Integer)len) { + do { /* find beginning of next character */ + posi++; + } while (iscontp(s + posi)); /* (cannot pass final '\0') */ + n--; + } + } + } + if (n == 0) /* did it find given character? */ + lua_pushinteger(L, posi + 1); + else /* no such character */ + luaL_pushfail(L); + return 1; +} + + +static int iter_aux (lua_State *L, int strict) { + size_t len; + const char *s = luaL_checklstring(L, 1, &len); + lua_Unsigned n = (lua_Unsigned)lua_tointeger(L, 2); + if (n < len) { + while (iscontp(s + n)) n++; /* go to next character */ + } + if (n >= len) /* (also handles original 'n' being negative) */ + return 0; /* no more codepoints */ + else { + utfint code; + const char *next = utf8_decode(s + n, &code, strict); + if (next == NULL || iscontp(next)) + return luaL_error(L, MSGInvalid); + lua_pushinteger(L, n + 1); + lua_pushinteger(L, code); + return 2; + } +} + + +static int iter_auxstrict (lua_State *L) { + return iter_aux(L, 1); +} + +static int iter_auxlax (lua_State *L) { + return iter_aux(L, 0); +} + + +static int iter_codes (lua_State *L) { + int lax = lua_toboolean(L, 2); + const char *s = luaL_checkstring(L, 1); + luaL_argcheck(L, !iscontp(s), 1, MSGInvalid); + lua_pushcfunction(L, lax ? iter_auxlax : iter_auxstrict); + lua_pushvalue(L, 1); + lua_pushinteger(L, 0); + return 3; +} + + +/* pattern to match a single UTF-8 character */ +#define UTF8PATT "[\0-\x7F\xC2-\xFD][\x80-\xBF]*" + + +static const luaL_Reg funcs[] = { + {"offset", byteoffset}, + {"codepoint", codepoint}, + {"char", utfchar}, + {"len", utflen}, + {"codes", iter_codes}, + /* placeholders */ + {"charpattern", NULL}, + {NULL, NULL} +}; + + +LUAMOD_API int luaopen_utf8 (lua_State *L) { + luaL_newlib(L, funcs); + lua_pushlstring(L, UTF8PATT, sizeof(UTF8PATT)/sizeof(char) - 1); + lua_setfield(L, -2, "charpattern"); + return 1; +} +