bazarr/libs/pyjsparser/pyjsparserdata.py

303 lines
No EOL
11 KiB
Python

# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
from __future__ import unicode_literals
import sys
import unicodedata
from collections import defaultdict
PY3 = sys.version_info >= (3,0)
if PY3:
unichr = chr
xrange = range
unicode = str
token = {
'BooleanLiteral': 1,
'EOF': 2,
'Identifier': 3,
'Keyword': 4,
'NullLiteral': 5,
'NumericLiteral': 6,
'Punctuator': 7,
'StringLiteral': 8,
'RegularExpression': 9,
'Template': 10
}
TokenName = dict((v,k) for k,v in token.items())
FnExprTokens = ['(', '{', '[', 'in', 'typeof', 'instanceof', 'new',
'return', 'case', 'delete', 'throw', 'void',
# assignment operators
'=', '+=', '-=', '*=', '/=', '%=', '<<=', '>>=', '>>>=',
'&=', '|=', '^=', ',',
# binary/unary operators
'+', '-', '*', '/', '%', '++', '--', '<<', '>>', '>>>', '&',
'|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=',
'<=', '<', '>', '!=', '!==']
syntax= set(('AssignmentExpression',
'AssignmentPattern',
'ArrayExpression',
'ArrayPattern',
'ArrowFunctionExpression',
'BlockStatement',
'BinaryExpression',
'BreakStatement',
'CallExpression',
'CatchClause',
'ClassBody',
'ClassDeclaration',
'ClassExpression',
'ConditionalExpression',
'ContinueStatement',
'DoWhileStatement',
'DebuggerStatement',
'EmptyStatement',
'ExportAllDeclaration',
'ExportDefaultDeclaration',
'ExportNamedDeclaration',
'ExportSpecifier',
'ExpressionStatement',
'ForStatement',
'ForInStatement',
'FunctionDeclaration',
'FunctionExpression',
'Identifier',
'IfStatement',
'ImportDeclaration',
'ImportDefaultSpecifier',
'ImportNamespaceSpecifier',
'ImportSpecifier',
'Literal',
'LabeledStatement',
'LogicalExpression',
'MemberExpression',
'MethodDefinition',
'NewExpression',
'ObjectExpression',
'ObjectPattern',
'Program',
'Property',
'RestElement',
'ReturnStatement',
'SequenceExpression',
'SpreadElement',
'Super',
'SwitchCase',
'SwitchStatement',
'TaggedTemplateExpression',
'TemplateElement',
'TemplateLiteral',
'ThisExpression',
'ThrowStatement',
'TryStatement',
'UnaryExpression',
'UpdateExpression',
'VariableDeclaration',
'VariableDeclarator',
'WhileStatement',
'WithStatement'))
# Error messages should be identical to V8.
messages = {
'UnexpectedToken': 'Unexpected token %s',
'UnexpectedNumber': 'Unexpected number',
'UnexpectedString': 'Unexpected string',
'UnexpectedIdentifier': 'Unexpected identifier',
'UnexpectedReserved': 'Unexpected reserved word',
'UnexpectedTemplate': 'Unexpected quasi %s',
'UnexpectedEOS': 'Unexpected end of input',
'NewlineAfterThrow': 'Illegal newline after throw',
'InvalidRegExp': 'Invalid regular expression',
'UnterminatedRegExp': 'Invalid regular expression: missing /',
'InvalidLHSInAssignment': 'Invalid left-hand side in assignment',
'InvalidLHSInForIn': 'Invalid left-hand side in for-in',
'MultipleDefaultsInSwitch': 'More than one default clause in switch statement',
'NoCatchOrFinally': 'Missing catch or finally after try',
'UnknownLabel': 'Undefined label \'%s\'',
'Redeclaration': '%s \'%s\' has already been declared',
'IllegalContinue': 'Illegal continue statement',
'IllegalBreak': 'Illegal break statement',
'IllegalReturn': 'Illegal return statement',
'StrictModeWith': 'Strict mode code may not include a with statement',
'StrictCatchVariable': 'Catch variable may not be eval or arguments in strict mode',
'StrictVarName': 'Variable name may not be eval or arguments in strict mode',
'StrictParamName': 'Parameter name eval or arguments is not allowed in strict mode',
'StrictParamDupe': 'Strict mode function may not have duplicate parameter names',
'StrictFunctionName': 'Function name may not be eval or arguments in strict mode',
'StrictOctalLiteral': 'Octal literals are not allowed in strict mode.',
'StrictDelete': 'Delete of an unqualified identifier in strict mode.',
'StrictLHSAssignment': 'Assignment to eval or arguments is not allowed in strict mode',
'StrictLHSPostfix': 'Postfix increment/decrement may not have eval or arguments operand in strict mode',
'StrictLHSPrefix': 'Prefix increment/decrement may not have eval or arguments operand in strict mode',
'StrictReservedWord': 'Use of future reserved word in strict mode',
'TemplateOctalLiteral': 'Octal literals are not allowed in template strings.',
'ParameterAfterRestParameter': 'Rest parameter must be last formal parameter',
'DefaultRestParameter': 'Unexpected token =',
'ObjectPatternAsRestParameter': 'Unexpected token {',
'DuplicateProtoProperty': 'Duplicate __proto__ fields are not allowed in object literals',
'ConstructorSpecialMethod': 'Class constructor may not be an accessor',
'DuplicateConstructor': 'A class may only have one constructor',
'StaticPrototype': 'Classes may not have static property named prototype',
'MissingFromClause': 'Unexpected token',
'NoAsAfterImportNamespace': 'Unexpected token',
'InvalidModuleSpecifier': 'Unexpected token',
'IllegalImportDeclaration': 'Unexpected token',
'IllegalExportDeclaration': 'Unexpected token'}
PRECEDENCE = {'||':1,
'&&':2,
'|':3,
'^':4,
'&':5,
'==':6,
'!=':6,
'===':6,
'!==':6,
'<':7,
'>':7,
'<=':7,
'>=':7,
'instanceof':7,
'in':7,
'<<':8,
'>>':8,
'>>>':8,
'+':9,
'-':9,
'*':11,
'/':11,
'%':11}
class Token: pass
class Syntax: pass
class Messages: pass
class PlaceHolders:
ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'
for k,v in token.items():
setattr(Token, k, v)
for e in syntax:
setattr(Syntax, e, e)
for k,v in messages.items():
setattr(Messages, k, v)
#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
BOM = u'\uFEFF'
ZWJ = u'\u200D'
ZWNJ = u'\u200C'
TAB = u'\u0009'
VT = u'\u000B'
FF = u'\u000C'
SP = u'\u0020'
NBSP = u'\u00A0'
LF = u'\u000A'
CR = u'\u000D'
LS = u'\u2028'
PS = u'\u2029'
U_CATEGORIES = defaultdict(list)
for c in map(unichr, range(sys.maxunicode + 1)):
U_CATEGORIES[unicodedata.category(c)].append(c)
UNICODE_LETTER = set(U_CATEGORIES['Lu']+U_CATEGORIES['Ll']+
U_CATEGORIES['Lt']+U_CATEGORIES['Lm']+
U_CATEGORIES['Lo']+U_CATEGORIES['Nl'])
UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn']+U_CATEGORIES['Mc'])
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
IDENTIFIER_START = UNICODE_LETTER.union(set(('$','_', '\\'))) # and some fucking unicode escape sequence
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT)\
.union(UNICODE_CONNECTOR_PUNCTUATION).union(set((ZWJ, ZWNJ)))
WHITE_SPACE = set((0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680,
0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
0x2009, 0x200A, 0x202F, 0x205F, 0x3000,
0xFEFF))
LINE_TERMINATORS = set((0x0A, 0x0D, 0x2028, 0x2029))
def isIdentifierStart(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_START
def isIdentifierPart(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_PART
def isWhiteSpace(ch):
return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE
def isLineTerminator(ch):
return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS
OCTAL = set(('0', '1', '2', '3', '4', '5', '6', '7'))
DEC = set(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
HEX = set('0123456789abcdefABCDEF')
HEX_CONV = dict(('0123456789abcdef'[n],n) for n in xrange(16))
for i,e in enumerate('ABCDEF', 10):
HEX_CONV[e] = i
def isDecimalDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC
def isHexDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX
def isOctalDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL
def isFutureReservedWord(w):
return w in ('enum', 'export', 'import', 'super')
RESERVED_WORD = set(('implements', 'interface', 'package', 'private', 'protected', 'public', 'static', 'yield', 'let'))
def isStrictModeReservedWord(w):
return w in RESERVED_WORD
def isRestrictedWord(w):
return w in ('eval', 'arguments')
KEYWORDS = set(('if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else', 'case',
'void', 'with', 'enum', 'while', 'break', 'catch', 'throw', 'const', 'yield',
'class', 'super', 'return', 'typeof', 'delete', 'switch', 'export', 'import',
'default', 'finally', 'extends', 'function', 'continue', 'debugger', 'instanceof', 'pyimport'))
def isKeyword(w):
# 'const' is specialized as Keyword in V8.
# 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
# Some others are from future reserved words.
return w in KEYWORDS
class JsSyntaxError(Exception): pass
if __name__=='__main__':
assert isLineTerminator('\n')
assert isLineTerminator(0x0A)
assert isIdentifierStart('$')
assert isIdentifierStart(100)
assert isWhiteSpace(' ')