mirror of
https://github.com/morpheus65535/bazarr.git
synced 2024-11-13 19:22:47 +08:00
91 lines
2.7 KiB
Python
91 lines
2.7 KiB
Python
import sys
|
|
import unicodedata
|
|
from collections import defaultdict
|
|
|
|
|
|
def is_lval(t):
|
|
"""Does not chceck whether t is not resticted or internal"""
|
|
if not t:
|
|
return False
|
|
i = iter(t)
|
|
if i.next() not in IDENTIFIER_START:
|
|
return False
|
|
return all(e in IDENTIFIER_PART for e in i)
|
|
|
|
|
|
def is_valid_lval(t):
|
|
"""Checks whether t is valid JS identifier name (no keyword like var, function, if etc)
|
|
Also returns false on internal"""
|
|
if not is_internal(t) and is_lval(t) and t not in RESERVED_NAMES:
|
|
return True
|
|
return False
|
|
|
|
|
|
def is_plval(t):
|
|
return t.startswith('PyJsLval')
|
|
|
|
|
|
def is_marker(t):
|
|
return t.startswith('PyJsMarker') or t.startswith('PyJsConstant')
|
|
|
|
|
|
def is_internal(t):
|
|
return is_plval(t) or is_marker(t) or t == 'var' # var is a scope var
|
|
|
|
|
|
def is_property_accessor(t):
|
|
return '[' in t or '.' in t
|
|
|
|
|
|
def is_reserved(t):
|
|
return t in RESERVED_NAMES
|
|
|
|
|
|
#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
|
|
BOM = u'\uFEFF'
|
|
ZWJ = u'\u200D'
|
|
ZWNJ = u'\u200C'
|
|
TAB = u'\u0009'
|
|
VT = u'\u000B'
|
|
FF = u'\u000C'
|
|
SP = u'\u0020'
|
|
NBSP = u'\u00A0'
|
|
LF = u'\u000A'
|
|
CR = u'\u000D'
|
|
LS = u'\u2028'
|
|
PS = u'\u2029'
|
|
|
|
U_CATEGORIES = defaultdict(list) # Thank you Martijn Pieters!
|
|
for c in map(unichr, range(sys.maxunicode + 1)):
|
|
U_CATEGORIES[unicodedata.category(c)].append(c)
|
|
|
|
UNICODE_LETTER = set(U_CATEGORIES['Lu'] + U_CATEGORIES['Ll'] +
|
|
U_CATEGORIES['Lt'] + U_CATEGORIES['Lm'] +
|
|
U_CATEGORIES['Lo'] + U_CATEGORIES['Nl'])
|
|
UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn'] + U_CATEGORIES['Mc'])
|
|
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
|
|
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
|
|
IDENTIFIER_START = UNICODE_LETTER.union(
|
|
{'$', '_'}) # and some fucking unicode escape sequence
|
|
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(
|
|
UNICODE_DIGIT).union(UNICODE_CONNECTOR_PUNCTUATION).union({ZWJ, ZWNJ})
|
|
USP = U_CATEGORIES['Zs']
|
|
KEYWORD = {
|
|
'break', 'do', 'instanceof', 'typeof', 'case', 'else', 'new', 'var',
|
|
'catch', 'finally', 'return', 'void', 'continue', 'for', 'switch', 'while',
|
|
'debugger', 'function', 'this', 'with', 'default', 'if', 'throw', 'delete',
|
|
'in', 'try'
|
|
}
|
|
|
|
FUTURE_RESERVED_WORD = {
|
|
'class', 'enum', 'extends', 'super', 'const', 'export', 'import'
|
|
}
|
|
RESERVED_NAMES = KEYWORD.union(FUTURE_RESERVED_WORD).union(
|
|
{'null', 'false', 'true'})
|
|
|
|
WHITE = {TAB, VT, FF, SP, NBSP, BOM}.union(USP)
|
|
LINE_TERMINATOR = {LF, CR, LS, PS}
|
|
LLINE_TERMINATOR = list(LINE_TERMINATOR)
|
|
x = ''.join(WHITE) + ''.join(LINE_TERMINATOR)
|
|
SPACE = WHITE.union(LINE_TERMINATOR)
|
|
LINE_TERMINATOR_SEQUENCE = LINE_TERMINATOR.union({CR + LF})
|