bazarr/libs/pyjsparser/parser.py

3041 lines
106 KiB
Python

# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
from __future__ import unicode_literals
from .pyjsparserdata import *
from .std_nodes import *
from pprint import pprint
import sys
__all__ = [
'PyJsParser', 'parse', 'ENABLE_JS2PY_ERRORS', 'ENABLE_PYIMPORT',
'JsSyntaxError'
]
REGEXP_SPECIAL_SINGLE = ('\\', '^', '$', '*', '+', '?', '.', '[', ']', '(',
')', '{', '{', '|', '-')
ENABLE_PYIMPORT = False
ENABLE_JS2PY_ERRORS = False
PY3 = sys.version_info >= (3, 0)
if PY3:
basestring = str
long = int
xrange = range
unicode = str
ESPRIMA_VERSION = '2.2.0'
DEBUG = False
# Small naming convention changes
# len -> leng
# id -> d
# type -> typ
# str -> st
true = True
false = False
null = None
class PyJsParser:
""" Usage:
parser = PyJsParser()
parser.parse('var JavaScriptCode = 5.1')
"""
def __init__(self):
self.clean()
def test(self, code):
pprint(self.parse(code))
def clean(self):
self.strict = None
self.sourceType = None
self.index = 0
self.lineNumber = 1
self.lineStart = 0
self.hasLineTerminator = None
self.lastIndex = None
self.lastLineNumber = None
self.lastLineStart = None
self.startIndex = None
self.startLineNumber = None
self.startLineStart = None
self.scanning = None
self.lookahead = None
self.state = None
self.extra = None
self.isBindingElement = None
self.isAssignmentTarget = None
self.firstCoverInitializedNameError = None
# 7.4 Comments
def skipSingleLineComment(self, offset):
start = self.index - offset
while self.index < self.length:
ch = self.source[self.index]
self.index += 1
if isLineTerminator(ch):
if (ord(ch) == 13 and ord(self.source[self.index]) == 10):
self.index += 1
self.lineNumber += 1
self.hasLineTerminator = True
self.lineStart = self.index
return
def skipMultiLineComment(self):
while self.index < self.length:
ch = ord(self.source[self.index])
if isLineTerminator(ch):
if (ch == 0x0D and ord(self.source[self.index + 1]) == 0x0A):
self.index += 1
self.lineNumber += 1
self.index += 1
self.hasLineTerminator = True
self.lineStart = self.index
elif ch == 0x2A:
# Block comment ends with '*/'.
if ord(self.source[self.index + 1]) == 0x2F:
self.index += 2
return
self.index += 1
else:
self.index += 1
self.tolerateUnexpectedToken()
def skipComment(self):
self.hasLineTerminator = False
start = (self.index == 0)
while self.index < self.length:
ch = ord(self.source[self.index])
if isWhiteSpace(ch):
self.index += 1
elif isLineTerminator(ch):
self.hasLineTerminator = True
self.index += 1
if (ch == 0x0D and ord(self.source[self.index]) == 0x0A):
self.index += 1
self.lineNumber += 1
self.lineStart = self.index
start = True
elif (ch == 0x2F): # U+002F is '/'
ch = ord(self.source[self.index + 1])
if (ch == 0x2F):
self.index += 2
self.skipSingleLineComment(2)
start = True
elif (ch == 0x2A): # U+002A is '*'
self.index += 2
self.skipMultiLineComment()
else:
break
elif (start and ch == 0x2D): # U+002D is '-'
# U+003E is '>'
if (ord(self.source[self.index + 1]) == 0x2D) and (ord(
self.source[self.index + 2]) == 0x3E):
# '-->' is a single-line comment
self.index += 3
self.skipSingleLineComment(3)
else:
break
elif (ch == 0x3C): # U+003C is '<'
if self.source[self.index + 1:self.index + 4] == '!--':
# <!--
self.index += 4
self.skipSingleLineComment(4)
else:
break
else:
break
def scanHexEscape(self, prefix):
code = 0
leng = 4 if (prefix == 'u') else 2
for i in xrange(leng):
if self.index < self.length and isHexDigit(
self.source[self.index]):
ch = self.source[self.index]
self.index += 1
code = code * 16 + HEX_CONV[ch]
else:
return ''
return unichr(code)
def scanUnicodeCodePointEscape(self):
ch = self.source[self.index]
code = 0
# At least, one hex digit is required.
if ch == '}':
self.throwUnexpectedToken()
while (self.index < self.length):
ch = self.source[self.index]
self.index += 1
if not isHexDigit(ch):
break
code = code * 16 + HEX_CONV[ch]
if code > 0x10FFFF or ch != '}':
self.throwUnexpectedToken()
# UTF-16 Encoding
if (code <= 0xFFFF):
return unichr(code)
cu1 = ((code - 0x10000) >> 10) + 0xD800
cu2 = ((code - 0x10000) & 1023) + 0xDC00
return unichr(cu1) + unichr(cu2)
def ccode(self, offset=0):
return ord(self.source[self.index + offset])
def log_err_case(self):
if not DEBUG:
return
print('INDEX', self.index)
print(self.source[self.index - 10:self.index + 10])
print('')
def at(self, loc):
return None if loc >= self.length else self.source[loc]
def substr(self, le, offset=0):
return self.source[self.index + offset:self.index + offset + le]
def getEscapedIdentifier(self):
d = self.source[self.index]
ch = ord(d)
self.index += 1
# '\u' (U+005C, U+0075) denotes an escaped character.
if (ch == 0x5C):
if (ord(self.source[self.index]) != 0x75):
self.throwUnexpectedToken()
self.index += 1
ch = self.scanHexEscape('u')
if not ch or ch == '\\' or not isIdentifierStart(ch[0]):
self.throwUnexpectedToken()
d = ch
while (self.index < self.length):
ch = self.ccode()
if not isIdentifierPart(ch):
break
self.index += 1
d += unichr(ch)
# '\u' (U+005C, U+0075) denotes an escaped character.
if (ch == 0x5C):
d = d[0:len(d) - 1]
if (self.ccode() != 0x75):
self.throwUnexpectedToken()
self.index += 1
ch = self.scanHexEscape('u')
if (not ch or ch == '\\' or not isIdentifierPart(ch[0])):
self.throwUnexpectedToken()
d += ch
return d
def getIdentifier(self):
start = self.index
self.index += 1
while (self.index < self.length):
ch = self.ccode()
if (ch == 0x5C):
# Blackslash (U+005C) marks Unicode escape sequence.
self.index = start
return self.getEscapedIdentifier()
if (isIdentifierPart(ch)):
self.index += 1
else:
break
return self.source[start:self.index]
def scanIdentifier(self):
start = self.index
# Backslash (U+005C) starts an escaped character.
d = self.getEscapedIdentifier() if (
self.ccode() == 0x5C) else self.getIdentifier()
# There is no keyword or literal with only one character.
# Thus, it must be an identifier.
if (len(d) == 1):
type = Token.Identifier
elif (isKeyword(d)):
type = Token.Keyword
elif (d == 'null'):
type = Token.NullLiteral
elif (d == 'true' or d == 'false'):
type = Token.BooleanLiteral
else:
type = Token.Identifier
return {
'type': type,
'value': d,
'raw': self.source[start:self.index],
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': start,
'end': self.index
}
# 7.7 Punctuators
def scanPunctuator(self):
token = {
'type': Token.Punctuator,
'value': '',
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': self.index,
'end': self.index
}
# Check for most common single-character punctuators.
st = self.source[self.index]
if st == '{':
self.state['curlyStack'].append('{')
self.index += 1
elif st == '}':
self.index += 1
self.state['curlyStack'].pop()
elif st in ('.', '(', ')', ';', ',', '[', ']', ':', '?', '~'):
self.index += 1
else:
# 4-character punctuator.
st = self.substr(4)
if (st == '>>>='):
self.index += 4
else:
# 3-character punctuators.
st = st[0:3]
if st in ('===', '!==', '>>>', '<<=', '>>='):
self.index += 3
else:
# 2-character punctuators.
st = st[0:2]
if st in ('&&', '||', '==', '!=', '+=', '-=', '*=', '/=',
'++', '--', '<<', '>>', '&=', '|=', '^=', '%=',
'<=', '>=', '=>'):
self.index += 2
else:
# 1-character punctuators.
st = self.source[self.index]
if st in ('<', '>', '=', '!', '+', '-', '*', '%', '&',
'|', '^', '/'):
self.index += 1
if self.index == token['start']:
self.throwUnexpectedToken()
token['end'] = self.index
token['value'] = st
return token
# 7.8.3 Numeric Literals
def scanHexLiteral(self, start):
number = ''
while (self.index < self.length):
if (not isHexDigit(self.source[self.index])):
break
number += self.source[self.index]
self.index += 1
if not number:
self.throwUnexpectedToken()
if isIdentifierStart(self.ccode()):
self.throwUnexpectedToken()
return {
'type': Token.NumericLiteral,
'value': int(number, 16),
'raw': self.source[start:self.index],
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': start,
'end': self.index
}
def scanBinaryLiteral(self, start):
number = ''
while (self.index < self.length):
ch = self.source[self.index]
if (ch != '0' and ch != '1'):
break
number += self.source[self.index]
self.index += 1
if not number:
# only 0b or 0B
self.throwUnexpectedToken()
if (self.index < self.length):
ch = self.source[self.index]
# istanbul ignore else
if (isIdentifierStart(ch) or isDecimalDigit(ch)):
self.throwUnexpectedToken()
return {
'type': Token.NumericLiteral,
'value': int(number, 2),
'raw': self.source[start:self.index],
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': start,
'end': self.index
}
def scanOctalLiteral(self, prefix, start):
if isOctalDigit(prefix):
octal = True
number = '0' + self.source[self.index]
self.index += 1
else:
octal = False
self.index += 1
number = ''
while (self.index < self.length):
if (not isOctalDigit(self.source[self.index])):
break
number += self.source[self.index]
self.index += 1
if (not octal and not number):
# only 0o or 0O
self.throwUnexpectedToken()
if (isIdentifierStart(self.ccode()) or isDecimalDigit(self.ccode())):
self.throwUnexpectedToken()
return {
'type': Token.NumericLiteral,
'value': int(number, 8),
'raw': self.source[start:self.index],
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': start,
'end': self.index
}
def octalToDecimal(self, ch):
# \0 is not octal escape sequence
octal = (ch != '0')
code = int(ch, 8)
if (self.index < self.length
and isOctalDigit(self.source[self.index])):
octal = True
code = code * 8 + int(self.source[self.index], 8)
self.index += 1
# 3 digits are only allowed when string starts
# with 0, 1, 2, 3
if (ch in '0123' and self.index < self.length
and isOctalDigit(self.source[self.index])):
code = code * 8 + int((self.source[self.index]), 8)
self.index += 1
return {'code': code, 'octal': octal}
def isImplicitOctalLiteral(self):
# Implicit octal, unless there is a non-octal digit.
# (Annex B.1.1 on Numeric Literals)
for i in xrange(self.index + 1, self.length):
ch = self.source[i]
if (ch == '8' or ch == '9'):
return False
if (not isOctalDigit(ch)):
return True
return True
def scanNumericLiteral(self):
ch = self.source[self.index]
assert isDecimalDigit(ch) or (
ch == '.'
), 'Numeric literal must start with a decimal digit or a decimal point'
start = self.index
number = ''
if ch != '.':
number = self.source[self.index]
self.index += 1
ch = self.source[self.index]
# Hex number starts with '0x'.
# Octal number starts with '0'.
# Octal number in ES6 starts with '0o'.
# Binary number in ES6 starts with '0b'.
if (number == '0'):
if (ch == 'x' or ch == 'X'):
self.index += 1
return self.scanHexLiteral(start)
if (ch == 'b' or ch == 'B'):
self.index += 1
return self.scanBinaryLiteral(start)
if (ch == 'o' or ch == 'O'):
return self.scanOctalLiteral(ch, start)
if (isOctalDigit(ch)):
if (self.isImplicitOctalLiteral()):
return self.scanOctalLiteral(ch, start)
while (isDecimalDigit(self.ccode())):
number += self.source[self.index]
self.index += 1
ch = self.source[self.index]
if (ch == '.'):
number += self.source[self.index]
self.index += 1
while (isDecimalDigit(self.source[self.index])):
number += self.source[self.index]
self.index += 1
ch = self.source[self.index]
if (ch == 'e' or ch == 'E'):
number += self.source[self.index]
self.index += 1
ch = self.source[self.index]
if (ch == '+' or ch == '-'):
number += self.source[self.index]
self.index += 1
if (isDecimalDigit(self.source[self.index])):
while (isDecimalDigit(self.source[self.index])):
number += self.source[self.index]
self.index += 1
else:
self.throwUnexpectedToken()
if (isIdentifierStart(self.source[self.index])):
self.throwUnexpectedToken()
return {
'type': Token.NumericLiteral,
'value': float(number),
'raw': self.source[start:self.index],
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': start,
'end': self.index
}
# 7.8.4 String Literals
def _interpret_regexp(self, string, flags):
'''Perform sctring escape - for regexp literals'''
self.index = 0
self.length = len(string)
self.source = string
self.lineNumber = 0
self.lineStart = 0
octal = False
st = ''
inside_square = 0
while (self.index < self.length):
template = '[%s]' if not inside_square else '%s'
ch = self.source[self.index]
self.index += 1
if ch == '\\':
ch = self.source[self.index]
self.index += 1
if (not isLineTerminator(ch)):
if ch == 'u':
digs = self.source[self.index:self.index + 4]
if len(digs) == 4 and all(isHexDigit(d) for d in digs):
st += template % unichr(int(digs, 16))
self.index += 4
else:
st += 'u'
elif ch == 'x':
digs = self.source[self.index:self.index + 2]
if len(digs) == 2 and all(isHexDigit(d) for d in digs):
st += template % unichr(int(digs, 16))
self.index += 2
else:
st += 'x'
# special meaning - single char.
elif ch == '0':
st += '\\0'
elif ch == 'n':
st += '\\n'
elif ch == 'r':
st += '\\r'
elif ch == 't':
st += '\\t'
elif ch == 'f':
st += '\\f'
elif ch == 'v':
st += '\\v'
# unescape special single characters like . so that they are interpreted literally
elif ch in REGEXP_SPECIAL_SINGLE:
st += '\\' + ch
# character groups
elif ch == 'b':
st += '\\b'
elif ch == 'B':
st += '\\B'
elif ch == 'w':
st += '\\w'
elif ch == 'W':
st += '\\W'
elif ch == 'd':
st += '\\d'
elif ch == 'D':
st += '\\D'
elif ch == 's':
st += template % u' \f\n\r\t\v\u00a0\u1680\u180e\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff'
elif ch == 'S':
st += template % u'\u0000-\u0008\u000e-\u001f\u0021-\u009f\u00a1-\u167f\u1681-\u180d\u180f-\u1fff\u200b-\u2027\u202a-\u202e\u2030-\u205e\u2060-\u2fff\u3001-\ufefe\uff00-\uffff'
else:
if isDecimalDigit(ch):
num = ch
while self.index < self.length and isDecimalDigit(
self.source[self.index]):
num += self.source[self.index]
self.index += 1
st += '\\' + num
else:
st += ch # DONT ESCAPE!!!
else:
self.lineNumber += 1
if (ch == '\r' and self.source[self.index] == '\n'):
self.index += 1
self.lineStart = self.index
else:
if ch == '[':
inside_square = True
elif ch == ']':
inside_square = False
st += ch
# print string, 'was transformed to', st
return st
def scanStringLiteral(self):
st = ''
octal = False
quote = self.source[self.index]
assert quote == '\'' or quote == '"', 'String literal must starts with a quote'
start = self.index
self.index += 1
while (self.index < self.length):
ch = self.source[self.index]
self.index += 1
if (ch == quote):
quote = ''
break
elif (ch == '\\'):
ch = self.source[self.index]
self.index += 1
if (not isLineTerminator(ch)):
if ch in 'ux':
if (self.source[self.index] == '{'):
self.index += 1
st += self.scanUnicodeCodePointEscape()
else:
unescaped = self.scanHexEscape(ch)
if (not unescaped):
self.throwUnexpectedToken(
) # with throw I don't know whats the difference
st += unescaped
elif ch == 'n':
st += '\n'
elif ch == 'r':
st += '\r'
elif ch == 't':
st += '\t'
elif ch == 'b':
st += '\b'
elif ch == 'f':
st += '\f'
elif ch == 'v':
st += '\x0B'
# elif ch in '89':
# self.throwUnexpectedToken() # again with throw....
else:
if isOctalDigit(ch):
octToDec = self.octalToDecimal(ch)
octal = octToDec.get('octal') or octal
st += unichr(octToDec['code'])
else:
st += ch
else:
self.lineNumber += 1
if (ch == '\r' and self.source[self.index] == '\n'):
self.index += 1
self.lineStart = self.index
elif isLineTerminator(ch):
break
else:
st += ch
if (quote != ''):
self.throwUnexpectedToken()
return {
'type': Token.StringLiteral,
'value': st,
'raw': self.source[start:self.index],
'octal': octal,
'lineNumber': self.lineNumber,
'lineStart': self.startLineStart,
'start': start,
'end': self.index
}
def scanTemplate(self):
cooked = ''
terminated = False
tail = False
start = self.index
head = (self.source[self.index] == '`')
rawOffset = 2
self.index += 1
while (self.index < self.length):
ch = self.source[self.index]
self.index += 1
if (ch == '`'):
rawOffset = 1
tail = True
terminated = True
break
elif (ch == '$'):
if (self.source[self.index] == '{'):
self.state['curlyStack'].append('${')
self.index += 1
terminated = True
break
cooked += ch
elif (ch == '\\'):
ch = self.source[self.index]
self.index += 1
if (not isLineTerminator(ch)):
if ch == 'n':
cooked += '\n'
elif ch == 'r':
cooked += '\r'
elif ch == 't':
cooked += '\t'
elif ch in 'ux':
if (self.source[self.index] == '{'):
self.index += 1
cooked += self.scanUnicodeCodePointEscape()
else:
restore = self.index
unescaped = self.scanHexEscape(ch)
if (unescaped):
cooked += unescaped
else:
self.index = restore
cooked += ch
elif ch == 'b':
cooked += '\b'
elif ch == 'f':
cooked += '\f'
elif ch == 'v':
cooked += '\v'
else:
if (ch == '0'):
if isDecimalDigit(self.ccode()):
# Illegal: \01 \02 and so on
self.throwError(Messages.TemplateOctalLiteral)
cooked += '\0'
elif (isOctalDigit(ch)):
# Illegal: \1 \2
self.throwError(Messages.TemplateOctalLiteral)
else:
cooked += ch
else:
self.lineNumber += 1
if (ch == '\r' and self.source[self.index] == '\n'):
self.index += 1
self.lineStart = self.index
elif (isLineTerminator(ch)):
self.lineNumber += 1
if (ch == '\r' and self.source[self.index] == '\n'):
self.index += 1
self.lineStart = self.index
cooked += '\n'
else:
cooked += ch
if (not terminated):
self.throwUnexpectedToken()
if (not head):
self.state['curlyStack'].pop()
return {
'type': Token.Template,
'value': {
'cooked': cooked,
'raw': self.source[start + 1:self.index - rawOffset]
},
'head': head,
'tail': tail,
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': start,
'end': self.index
}
def testRegExp(self, pattern, flags):
# todo: you should return python regexp object
return (pattern, flags)
def scanRegExpBody(self):
ch = self.source[self.index]
assert ch == '/', 'Regular expression literal must start with a slash'
st = ch
self.index += 1
classMarker = False
terminated = False
while (self.index < self.length):
ch = self.source[self.index]
self.index += 1
st += ch
if (ch == '\\'):
ch = self.source[self.index]
self.index += 1
# ECMA-262 7.8.5
if (isLineTerminator(ch)):
self.throwUnexpectedToken(None,
Messages.UnterminatedRegExp)
st += ch
elif (isLineTerminator(ch)):
self.throwUnexpectedToken(None, Messages.UnterminatedRegExp)
elif (classMarker):
if (ch == ']'):
classMarker = False
else:
if (ch == '/'):
terminated = True
break
elif (ch == '['):
classMarker = True
if (not terminated):
self.throwUnexpectedToken(None, Messages.UnterminatedRegExp)
# Exclude leading and trailing slash.
body = st[1:-1]
return {'value': body, 'literal': st}
def scanRegExpFlags(self):
st = ''
flags = ''
while (self.index < self.length):
ch = self.source[self.index]
if (not isIdentifierPart(ch)):
break
self.index += 1
if (ch == '\\' and self.index < self.length):
ch = self.source[self.index]
if (ch == 'u'):
self.index += 1
restore = self.index
ch = self.scanHexEscape('u')
if (ch):
flags += ch
st += '\\u'
while restore < self.index:
st += self.source[restore]
restore += 1
else:
self.index = restore
flags += 'u'
st += '\\u'
self.tolerateUnexpectedToken()
else:
st += '\\'
self.tolerateUnexpectedToken()
else:
flags += ch
st += ch
return {'value': flags, 'literal': st}
def scanRegExp(self):
self.scanning = True
self.lookahead = None
self.skipComment()
start = self.index
body = self.scanRegExpBody()
flags = self.scanRegExpFlags()
value = self.testRegExp(body['value'], flags['value'])
scanning = False
return {
'literal': body['literal'] + flags['literal'],
'value': value,
'raw': self.source[start:self.index],
'regex': {
'pattern': body['value'],
'flags': flags['value']
},
'start': start,
'end': self.index
}
def collectRegex(self):
self.skipComment()
return self.scanRegExp()
def isIdentifierName(self, token):
return token['type'] in (1, 3, 4, 5)
# def advanceSlash(self): ???
def advance(self):
if (self.index >= self.length):
return {
'type': Token.EOF,
'lineNumber': self.lineNumber,
'lineStart': self.lineStart,
'start': self.index,
'end': self.index
}
ch = self.ccode()
if isIdentifierStart(ch):
token = self.scanIdentifier()
if (self.strict and isStrictModeReservedWord(token['value'])):
token['type'] = Token.Keyword
return token
# Very common: ( and ) and ;
if (ch == 0x28 or ch == 0x29 or ch == 0x3B):
return self.scanPunctuator()
# String literal starts with single quote (U+0027) or double quote (U+0022).
if (ch == 0x27 or ch == 0x22):
return self.scanStringLiteral()
# Dot (.) U+002E can also start a floating-point number, hence the need
# to check the next character.
if (ch == 0x2E):
if (isDecimalDigit(self.ccode(1))):
return self.scanNumericLiteral()
return self.scanPunctuator()
if (isDecimalDigit(ch)):
return self.scanNumericLiteral()
# Slash (/) U+002F can also start a regex.
# if (extra.tokenize && ch == 0x2F):
# return advanceSlash();
# Template literals start with ` (U+0060) for template head
# or } (U+007D) for template middle or template tail.
if (ch == 0x60
or (ch == 0x7D
and self.state['curlyStack'][len(self.state['curlyStack'])
- 1] == '${')):
return self.scanTemplate()
return self.scanPunctuator()
# def collectToken(self):
# loc = {
# 'start': {
# 'line': self.lineNumber,
# 'column': self.index - self.lineStart}}
#
# token = self.advance()
#
# loc['end'] = {
# 'line': self.lineNumber,
# 'column': self.index - self.lineStart}
# if (token['type'] != Token.EOF):
# value = self.source[token['start']: token['end']]
# entry = {
# 'type': TokenName[token['type']],
# 'value': value,
# 'range': [token['start'], token['end']],
# 'loc': loc}
# if (token.get('regex')):
# entry['regex'] = {
# 'pattern': token['regex']['pattern'],
# 'flags': token['regex']['flags']}
# self.extra['tokens'].append(entry)
# return token;
def lex(self):
self.scanning = True
self.lastIndex = self.index
self.lastLineNumber = self.lineNumber
self.lastLineStart = self.lineStart
self.skipComment()
token = self.lookahead
self.startIndex = self.index
self.startLineNumber = self.lineNumber
self.startLineStart = self.lineStart
self.lookahead = self.advance()
self.scanning = False
return token
def peek(self):
self.scanning = True
self.skipComment()
self.lastIndex = self.index
self.lastLineNumber = self.lineNumber
self.lastLineStart = self.lineStart
self.startIndex = self.index
self.startLineNumber = self.lineNumber
self.startLineStart = self.lineStart
self.lookahead = self.advance()
self.scanning = False
def createError(self, line, pos, description):
global ENABLE_PYIMPORT
msg = 'Line ' + unicode(line) + ': ' + unicode(description)
if ENABLE_JS2PY_ERRORS:
return ENABLE_JS2PY_ERRORS(msg)
else:
return JsSyntaxError(msg)
# Throw an exception
def throwError(self, messageFormat, *args):
msg = messageFormat % tuple(unicode(e) for e in args)
raise self.createError(self.lastLineNumber, self.lastIndex, msg)
def tolerateError(self, messageFormat, *args):
return self.throwError(messageFormat, *args)
# Throw an exception because of the token.
def unexpectedTokenError(self, token={}, message=''):
msg = message or Messages.UnexpectedToken
if (token):
typ = token['type']
if (not message):
if typ == Token.EOF:
msg = Messages.UnexpectedEOS
elif (typ == Token.Identifier):
msg = Messages.UnexpectedIdentifier
elif (typ == Token.NumericLiteral):
msg = Messages.UnexpectedNumber
elif (typ == Token.StringLiteral):
msg = Messages.UnexpectedString
elif (typ == Token.Template):
msg = Messages.UnexpectedTemplate
else:
msg = Messages.UnexpectedToken
if (typ == Token.Keyword):
if (isFutureReservedWord(token['value'])):
msg = Messages.UnexpectedReserved
elif (self.strict
and isStrictModeReservedWord(token['value'])):
msg = Messages.StrictReservedWord
value = token['value']['raw'] if (
typ == Token.Template) else token.get('value')
else:
value = 'ILLEGAL'
msg = msg.replace('%s', unicode(value))
return (self.createError(token['lineNumber'], token['start'], msg) if
(token and token.get('lineNumber')) else self.createError(
self.lineNumber if self.scanning else self.lastLineNumber,
self.index if self.scanning else self.lastIndex, msg))
def throwUnexpectedToken(self, token={}, message=''):
raise self.unexpectedTokenError(token, message)
def tolerateUnexpectedToken(self, token={}, message=''):
self.throwUnexpectedToken(token, message)
# Expect the next token to match the specified punctuator.
# If not, an exception will be thrown.
def expect(self, value):
token = self.lex()
if (token['type'] != Token.Punctuator or token['value'] != value):
self.throwUnexpectedToken(token)
# /**
# * @name expectCommaSeparator
# * @description Quietly expect a comma when in tolerant mode, otherwise delegates
# * to <code>expect(value)</code>
# * @since 2.0
# */
def expectCommaSeparator(self):
self.expect(',')
# Expect the next token to match the specified keyword.
# If not, an exception will be thrown.
def expectKeyword(self, keyword):
token = self.lex()
if (token['type'] != Token.Keyword or token['value'] != keyword):
self.throwUnexpectedToken(token)
# Return true if the next token matches the specified punctuator.
def match(self, value):
return self.lookahead['type'] == Token.Punctuator and self.lookahead[
'value'] == value
# Return true if the next token matches the specified keyword
def matchKeyword(self, keyword):
return self.lookahead['type'] == Token.Keyword and self.lookahead[
'value'] == keyword
# Return true if the next token matches the specified contextual keyword
# (where an identifier is sometimes a keyword depending on the context)
def matchContextualKeyword(self, keyword):
return self.lookahead['type'] == Token.Identifier and self.lookahead[
'value'] == keyword
# Return true if the next token is an assignment operator
def matchAssign(self):
if (self.lookahead['type'] != Token.Punctuator):
return False
op = self.lookahead['value']
return op in ('=', '*=', '/=', '%=', '+=', '-=', '<<=', '>>=', '>>>=',
'&=', '^=', '|=')
def consumeSemicolon(self):
# Catch the very common case first: immediately a semicolon (U+003B).
if (self.at(self.startIndex) == ';' or self.match(';')):
self.lex()
return
if (self.hasLineTerminator):
return
# TODO: FIXME(ikarienator): this is seemingly an issue in the previous location info convention.
self.lastIndex = self.startIndex
self.lastLineNumber = self.startLineNumber
self.lastLineStart = self.startLineStart
if (self.lookahead['type'] != Token.EOF and not self.match('}')):
self.throwUnexpectedToken(self.lookahead)
# // Cover grammar support.
# //
# // When an assignment expression position starts with an left parenthesis, the determination of the type
# // of the syntax is to be deferred arbitrarily long until the end of the parentheses pair (plus a lookahead)
# // or the first comma. This situation also defers the determination of all the expressions nested in the pair.
# //
# // There are three productions that can be parsed in a parentheses pair that needs to be determined
# // after the outermost pair is closed. They are:
# //
# // 1. AssignmentExpression
# // 2. BindingElements
# // 3. AssignmentTargets
# //
# // In order to avoid exponential backtracking, we use two flags to denote if the production can be
# // binding element or assignment target.
# //
# // The three productions have the relationship:
# //
# // BindingElements <= AssignmentTargets <= AssignmentExpression
# //
# // with a single exception that CoverInitializedName when used directly in an Expression, generates
# // an early error. Therefore, we need the third state, firstCoverInitializedNameError, to track the
# // first usage of CoverInitializedName and report it when we reached the end of the parentheses pair.
# //
# // isolateCoverGrammar function runs the given parser function with a new cover grammar context, and it does not
# // effect the current flags. This means the production the parser parses is only used as an expression. Therefore
# // the CoverInitializedName check is conducted.
# //
# // inheritCoverGrammar function runs the given parse function with a new cover grammar context, and it propagates
# // the flags outside of the parser. This means the production the parser parses is used as a part of a potential
# // pattern. The CoverInitializedName check is deferred.
def isolateCoverGrammar(self, parser):
oldIsBindingElement = self.isBindingElement
oldIsAssignmentTarget = self.isAssignmentTarget
oldFirstCoverInitializedNameError = self.firstCoverInitializedNameError
self.isBindingElement = true
self.isAssignmentTarget = true
self.firstCoverInitializedNameError = null
result = parser()
if (self.firstCoverInitializedNameError != null):
self.throwUnexpectedToken(self.firstCoverInitializedNameError)
self.isBindingElement = oldIsBindingElement
self.isAssignmentTarget = oldIsAssignmentTarget
self.firstCoverInitializedNameError = oldFirstCoverInitializedNameError
return result
def inheritCoverGrammar(self, parser):
oldIsBindingElement = self.isBindingElement
oldIsAssignmentTarget = self.isAssignmentTarget
oldFirstCoverInitializedNameError = self.firstCoverInitializedNameError
self.isBindingElement = true
self.isAssignmentTarget = true
self.firstCoverInitializedNameError = null
result = parser()
self.isBindingElement = self.isBindingElement and oldIsBindingElement
self.isAssignmentTarget = self.isAssignmentTarget and oldIsAssignmentTarget
self.firstCoverInitializedNameError = oldFirstCoverInitializedNameError or self.firstCoverInitializedNameError
return result
def parseArrayPattern(self):
raise Ecma51NotSupported('ArrayPattern')
node = Node()
elements = []
self.expect('[')
while (not self.match(']')):
if (self.match(',')):
self.lex()
elements.append(null)
else:
if (self.match('...')):
restNode = Node()
self.lex()
rest = self.parseVariableIdentifier()
elements.append(restNode.finishRestElement(rest))
break
else:
elements.append(self.parsePatternWithDefault())
if (not self.match(']')):
self.expect(',')
self.expect(']')
return node.finishArrayPattern(elements)
def parsePropertyPattern(self):
node = Node()
computed = self.match('[')
if (self.lookahead['type'] == Token.Identifier):
key = self.parseVariableIdentifier()
if (self.match('=')):
self.lex()
init = self.parseAssignmentExpression()
return node.finishProperty(
'init', key, false,
WrappingNode(key).finishAssignmentPattern(key, init),
false, false)
elif (not self.match(':')):
return node.finishProperty('init', key, false, key, false,
true)
else:
key = self.parseObjectPropertyKey()
self.expect(':')
init = self.parsePatternWithDefault()
return node.finishProperty('init', key, computed, init, false, false)
def parseObjectPattern(self):
raise Ecma51NotSupported('ObjectPattern')
node = Node()
properties = []
self.expect('{')
while (not self.match('}')):
properties.append(self.parsePropertyPattern())
if (not self.match('}')):
self.expect(',')
self.lex()
return node.finishObjectPattern(properties)
def parsePattern(self):
if (self.lookahead['type'] == Token.Identifier):
return self.parseVariableIdentifier()
elif (self.match('[')):
return self.parseArrayPattern()
elif (self.match('{')):
return self.parseObjectPattern()
self.throwUnexpectedToken(self.lookahead)
def parsePatternWithDefault(self):
startToken = self.lookahead
pattern = self.parsePattern()
if (self.match('=')):
self.lex()
right = self.isolateCoverGrammar(self.parseAssignmentExpression)
pattern = WrappingNode(startToken).finishAssignmentPattern(
pattern, right)
return pattern
# 11.1.4 Array Initialiser
def parseArrayInitialiser(self):
elements = []
node = Node()
self.expect('[')
while (not self.match(']')):
if (self.match(',')):
self.lex()
elements.append(null)
elif (self.match('...')):
restSpread = Node()
self.lex()
restSpread.finishSpreadElement(
self.inheritCoverGrammar(self.parseAssignmentExpression))
if (not self.match(']')):
self.isAssignmentTarget = self.isBindingElement = false
self.expect(',')
elements.append(restSpread)
else:
elements.append(
self.inheritCoverGrammar(self.parseAssignmentExpression))
if (not self.match(']')):
self.expect(',')
self.lex()
return node.finishArrayExpression(elements)
# 11.1.5 Object Initialiser
def parsePropertyFunction(self, node, paramInfo):
self.isAssignmentTarget = self.isBindingElement = false
previousStrict = self.strict
body = self.isolateCoverGrammar(self.parseFunctionSourceElements)
if (self.strict and paramInfo['firstRestricted']):
self.tolerateUnexpectedToken(paramInfo['firstRestricted'],
paramInfo.get('message'))
if (self.strict and paramInfo.get('stricted')):
self.tolerateUnexpectedToken(
paramInfo.get('stricted'), paramInfo.get('message'))
self.strict = previousStrict
return node.finishFunctionExpression(null, paramInfo.get('params'),
paramInfo.get('defaults'), body)
def parsePropertyMethodFunction(self):
node = Node()
params = self.parseParams(null)
method = self.parsePropertyFunction(node, params)
return method
def parseObjectPropertyKey(self):
node = Node()
token = self.lex()
# // Note: This function is called only from parseObjectProperty(), where
# // EOF and Punctuator tokens are already filtered out.
typ = token['type']
if typ in [Token.StringLiteral, Token.NumericLiteral]:
if self.strict and token.get('octal'):
self.tolerateUnexpectedToken(token,
Messages.StrictOctalLiteral)
return node.finishLiteral(token)
elif typ in (Token.Identifier, Token.BooleanLiteral, Token.NullLiteral,
Token.Keyword):
return node.finishIdentifier(token['value'])
elif typ == Token.Punctuator:
if (token['value'] == '['):
expr = self.isolateCoverGrammar(self.parseAssignmentExpression)
self.expect(']')
return expr
self.throwUnexpectedToken(token)
def lookaheadPropertyName(self):
typ = self.lookahead['type']
if typ in (Token.Identifier, Token.StringLiteral, Token.BooleanLiteral,
Token.NullLiteral, Token.NumericLiteral, Token.Keyword):
return true
if typ == Token.Punctuator:
return self.lookahead['value'] == '['
return false
# // This function is to try to parse a MethodDefinition as defined in 14.3. But in the case of object literals,
# // it might be called at a position where there is in fact a short hand identifier pattern or a data property.
# // This can only be determined after we consumed up to the left parentheses.
# //
# // In order to avoid back tracking, it returns `null` if the position is not a MethodDefinition and the caller
# // is responsible to visit other options.
def tryParseMethodDefinition(self, token, key, computed, node):
if (token['type'] == Token.Identifier):
# check for `get` and `set`;
if (token['value'] == 'get' and self.lookaheadPropertyName()):
computed = self.match('[')
key = self.parseObjectPropertyKey()
methodNode = Node()
self.expect('(')
self.expect(')')
value = self.parsePropertyFunction(
methodNode, {
'params': [],
'defaults': [],
'stricted': null,
'firstRestricted': null,
'message': null
})
return node.finishProperty('get', key, computed, value, false,
false)
elif (token['value'] == 'set' and self.lookaheadPropertyName()):
computed = self.match('[')
key = self.parseObjectPropertyKey()
methodNode = Node()
self.expect('(')
options = {
'params': [],
'defaultCount': 0,
'defaults': [],
'firstRestricted': null,
'paramSet': {}
}
if (self.match(')')):
self.tolerateUnexpectedToken(self.lookahead)
else:
self.parseParam(options)
if (options['defaultCount'] == 0):
options['defaults'] = []
self.expect(')')
value = self.parsePropertyFunction(methodNode, options)
return node.finishProperty('set', key, computed, value, false,
false)
if (self.match('(')):
value = self.parsePropertyMethodFunction()
return node.finishProperty('init', key, computed, value, true,
false)
return null
def checkProto(self, key, computed, hasProto):
return
if (computed == false and
(key['type'] == Syntax.Identifier and key['name'] == '__proto__' or
key['type'] == Syntax.Literal and key['value'] == '__proto__')):
if (hasProto['value']):
self.tolerateError(Messages.DuplicateProtoProperty)
else:
hasProto['value'] = true
def parseObjectProperty(self, hasProto):
token = self.lookahead
node = Node()
computed = self.match('[')
key = self.parseObjectPropertyKey()
maybeMethod = self.tryParseMethodDefinition(token, key, computed, node)
if (maybeMethod):
self.checkProto(maybeMethod['key'], maybeMethod['computed'],
hasProto)
return maybeMethod
# // init property or short hand property.
self.checkProto(key, computed, hasProto)
if (self.match(':')):
self.lex()
value = self.inheritCoverGrammar(self.parseAssignmentExpression)
return node.finishProperty('init', key, computed, value, false,
false)
if (token['type'] == Token.Identifier):
if (self.match('=')):
self.firstCoverInitializedNameError = self.lookahead
self.lex()
value = self.isolateCoverGrammar(
self.parseAssignmentExpression)
return node.finishProperty(
'init', key, computed,
WrappingNode(token).finishAssignmentPattern(key, value),
false, true)
return node.finishProperty('init', key, computed, key, false, true)
self.throwUnexpectedToken(self.lookahead)
def parseObjectInitialiser(self):
properties = []
hasProto = {'value': false}
node = Node()
self.expect('{')
while (not self.match('}')):
properties.append(self.parseObjectProperty(hasProto))
if (not self.match('}')):
self.expectCommaSeparator()
self.expect('}')
return node.finishObjectExpression(properties)
def reinterpretExpressionAsPattern(self, expr):
typ = (expr['type'])
if typ in (Syntax.Identifier, Syntax.MemberExpression,
Syntax.RestElement, Syntax.AssignmentPattern):
pass
elif typ == Syntax.SpreadElement:
expr['type'] = Syntax.RestElement
self.reinterpretExpressionAsPattern(expr.argument)
elif typ == Syntax.ArrayExpression:
expr['type'] = Syntax.ArrayPattern
for i in xrange(len(expr['elements'])):
if (expr['elements'][i] != null):
self.reinterpretExpressionAsPattern(expr['elements'][i])
elif typ == Syntax.ObjectExpression:
expr['type'] = Syntax.ObjectPattern
for i in xrange(len(expr['properties'])):
self.reinterpretExpressionAsPattern(
expr['properties'][i]['value'])
elif Syntax.AssignmentExpression:
raise Ecma51NotSupported('AssignmentPattern')
expr['type'] = Syntax.AssignmentPattern
self.reinterpretExpressionAsPattern(expr['left'])
else:
# // Allow other node type for tolerant parsing.
return
def parseTemplateElement(self, option):
if (self.lookahead['type'] != Token.Template
or (option['head'] and not self.lookahead['head'])):
self.throwUnexpectedToken()
node = Node()
token = self.lex()
return node.finishTemplateElement({
'raw': token['value']['raw'],
'cooked': token['value']['cooked']
}, token['tail'])
def parseTemplateLiteral(self):
node = Node()
quasi = self.parseTemplateElement({'head': true})
quasis = [quasi]
expressions = []
while (not quasi['tail']):
expressions.append(self.parseExpression())
quasi = self.parseTemplateElement({
'head': false
})
quasis.append(quasi)
return node.finishTemplateLiteral(quasis, expressions)
# 11.1.6 The Grouping Operator
def parseGroupExpression(self):
self.expect('(')
if (self.match(')')):
raise Ecma51NotSupported('ArrowFunction')
self.lex()
if (not self.match('=>')):
self.expect('=>')
return {
'type': PlaceHolders.ArrowParameterPlaceHolder,
'params': []
}
startToken = self.lookahead
if (self.match('...')):
expr = self.parseRestElement()
self.expect(')')
if (not self.match('=>')):
self.expect('=>')
return {
'type': PlaceHolders.ArrowParameterPlaceHolder,
'params': [expr]
}
self.isBindingElement = true
expr = self.inheritCoverGrammar(self.parseAssignmentExpression)
if (self.match(',')):
self.isAssignmentTarget = false
expressions = [expr]
while (self.startIndex < self.length):
if (not self.match(',')):
break
self.lex()
if (self.match('...')):
raise Ecma51NotSupported('ArrowFunction')
if (not self.isBindingElement):
self.throwUnexpectedToken(self.lookahead)
expressions.append(self.parseRestElement())
self.expect(')')
if (not self.match('=>')):
self.expect('=>')
self.isBindingElement = false
for i in xrange(len(expressions)):
self.reinterpretExpressionAsPattern(expressions[i])
return {
'type': PlaceHolders.ArrowParameterPlaceHolder,
'params': expressions
}
expressions.append(
self.inheritCoverGrammar(self.parseAssignmentExpression))
expr = WrappingNode(startToken).finishSequenceExpression(
expressions)
self.expect(')')
if (self.match('=>')):
raise Ecma51NotSupported('ArrowFunction')
if (not self.isBindingElement):
self.throwUnexpectedToken(self.lookahead)
if (expr['type'] == Syntax.SequenceExpression):
for i in xrange(len(expr.expressions)):
self.reinterpretExpressionAsPattern(expr['expressions'][i])
else:
self.reinterpretExpressionAsPattern(expr)
expr = {
'type':
PlaceHolders.ArrowParameterPlaceHolder,
'params':
expr['expressions']
if expr['type'] == Syntax.SequenceExpression else [expr]
}
self.isBindingElement = false
return expr
# 11.1 Primary Expressions
def parsePrimaryExpression(self):
if (self.match('(')):
self.isBindingElement = false
return self.inheritCoverGrammar(self.parseGroupExpression)
if (self.match('[')):
return self.inheritCoverGrammar(self.parseArrayInitialiser)
if (self.match('{')):
return self.inheritCoverGrammar(self.parseObjectInitialiser)
typ = self.lookahead['type']
node = Node()
if (typ == Token.Identifier):
expr = node.finishIdentifier(self.lex()['value'])
elif (typ == Token.StringLiteral or typ == Token.NumericLiteral):
self.isAssignmentTarget = self.isBindingElement = false
if (self.strict and self.lookahead.get('octal')):
self.tolerateUnexpectedToken(self.lookahead,
Messages.StrictOctalLiteral)
expr = node.finishLiteral(self.lex())
elif (typ == Token.Keyword):
self.isAssignmentTarget = self.isBindingElement = false
if (self.matchKeyword('function')):
return self.parseFunctionExpression()
if (self.matchKeyword('this')):
self.lex()
return node.finishThisExpression()
if (self.matchKeyword('class')):
return self.parseClassExpression()
self.throwUnexpectedToken(self.lex())
elif (typ == Token.BooleanLiteral):
isAssignmentTarget = self.isBindingElement = false
token = self.lex()
token['value'] = (token['value'] == 'true')
expr = node.finishLiteral(token)
elif (typ == Token.NullLiteral):
self.isAssignmentTarget = self.isBindingElement = false
token = self.lex()
token['value'] = null
expr = node.finishLiteral(token)
elif (self.match('/') or self.match('/=')):
self.isAssignmentTarget = self.isBindingElement = false
self.index = self.startIndex
token = self.scanRegExp()
# hehe, here you are!
self.lex()
expr = node.finishLiteral(token)
elif (typ == Token.Template):
expr = self.parseTemplateLiteral()
else:
self.throwUnexpectedToken(self.lex())
return expr
# 11.2 Left-Hand-Side Expressions
def parseArguments(self):
args = []
self.expect('(')
if (not self.match(')')):
while (self.startIndex < self.length):
args.append(
self.isolateCoverGrammar(self.parseAssignmentExpression))
if (self.match(')')):
break
self.expectCommaSeparator()
self.expect(')')
return args
def parseNonComputedProperty(self):
node = Node()
token = self.lex()
if (not self.isIdentifierName(token)):
self.throwUnexpectedToken(token)
return node.finishIdentifier(token['value'])
def parseNonComputedMember(self):
self.expect('.')
return self.parseNonComputedProperty()
def parseComputedMember(self):
self.expect('[')
expr = self.isolateCoverGrammar(self.parseExpression)
self.expect(']')
return expr
def parseNewExpression(self):
node = Node()
self.expectKeyword('new')
callee = self.isolateCoverGrammar(self.parseLeftHandSideExpression)
args = self.parseArguments() if self.match('(') else []
self.isAssignmentTarget = self.isBindingElement = false
return node.finishNewExpression(callee, args)
def parseLeftHandSideExpressionAllowCall(self):
previousAllowIn = self.state['allowIn']
startToken = self.lookahead
self.state['allowIn'] = true
if (self.matchKeyword('super') and self.state['inFunctionBody']):
expr = Node()
self.lex()
expr = expr.finishSuper()
if (not self.match('(') and not self.match('.')
and not self.match('[')):
self.throwUnexpectedToken(self.lookahead)
else:
expr = self.inheritCoverGrammar(
self.parseNewExpression if self.matchKeyword('new') else self.
parsePrimaryExpression)
while True:
if (self.match('.')):
self.isBindingElement = false
self.isAssignmentTarget = true
property = self.parseNonComputedMember()
expr = WrappingNode(startToken).finishMemberExpression(
'.', expr, property)
elif (self.match('(')):
self.isBindingElement = false
self.isAssignmentTarget = false
args = self.parseArguments()
expr = WrappingNode(startToken).finishCallExpression(
expr, args)
elif (self.match('[')):
self.isBindingElement = false
self.isAssignmentTarget = true
property = self.parseComputedMember()
expr = WrappingNode(startToken).finishMemberExpression(
'[', expr, property)
elif (self.lookahead['type'] == Token.Template
and self.lookahead['head']):
quasi = self.parseTemplateLiteral()
expr = WrappingNode(startToken).finishTaggedTemplateExpression(
expr, quasi)
else:
break
self.state['allowIn'] = previousAllowIn
return expr
def parseLeftHandSideExpression(self):
assert self.state[
'allowIn'], 'callee of new expression always allow in keyword.'
startToken = self.lookahead
if (self.matchKeyword('super') and self.state['inFunctionBody']):
expr = Node()
self.lex()
expr = expr.finishSuper()
if (not self.match('[') and not self.match('.')):
self.throwUnexpectedToken(self.lookahead)
else:
expr = self.inheritCoverGrammar(
self.parseNewExpression if self.matchKeyword('new') else self.
parsePrimaryExpression)
while True:
if (self.match('[')):
self.isBindingElement = false
self.isAssignmentTarget = true
property = self.parseComputedMember()
expr = WrappingNode(startToken).finishMemberExpression(
'[', expr, property)
elif (self.match('.')):
self.isBindingElement = false
self.isAssignmentTarget = true
property = self.parseNonComputedMember()
expr = WrappingNode(startToken).finishMemberExpression(
'.', expr, property)
elif (self.lookahead['type'] == Token.Template
and self.lookahead['head']):
quasi = self.parseTemplateLiteral()
expr = WrappingNode(startToken).finishTaggedTemplateExpression(
expr, quasi)
else:
break
return expr
# 11.3 Postfix Expressions
def parsePostfixExpression(self):
startToken = self.lookahead
expr = self.inheritCoverGrammar(
self.parseLeftHandSideExpressionAllowCall)
if (not self.hasLineTerminator
and self.lookahead['type'] == Token.Punctuator):
if (self.match('++') or self.match('--')):
# 11.3.1, 11.3.2
if (self.strict and expr.type == Syntax.Identifier
and isRestrictedWord(expr.name)):
self.tolerateError(Messages.StrictLHSPostfix)
if (not self.isAssignmentTarget):
self.tolerateError(Messages.InvalidLHSInAssignment)
self.isAssignmentTarget = self.isBindingElement = false
token = self.lex()
expr = WrappingNode(startToken).finishPostfixExpression(
token['value'], expr)
return expr
# 11.4 Unary Operators
def parseUnaryExpression(self):
if (self.lookahead['type'] != Token.Punctuator
and self.lookahead['type'] != Token.Keyword):
expr = self.parsePostfixExpression()
elif (self.match('++') or self.match('--')):
startToken = self.lookahead
token = self.lex()
expr = self.inheritCoverGrammar(self.parseUnaryExpression)
# 11.4.4, 11.4.5
if (self.strict and expr.type == Syntax.Identifier
and isRestrictedWord(expr.name)):
self.tolerateError(Messages.StrictLHSPrefix)
if (not self.isAssignmentTarget):
self.tolerateError(Messages.InvalidLHSInAssignment)
expr = WrappingNode(startToken).finishUnaryExpression(
token['value'], expr)
self.isAssignmentTarget = self.isBindingElement = false
elif (self.match('+') or self.match('-') or self.match('~')
or self.match('!')):
startToken = self.lookahead
token = self.lex()
expr = self.inheritCoverGrammar(self.parseUnaryExpression)
expr = WrappingNode(startToken).finishUnaryExpression(
token['value'], expr)
self.isAssignmentTarget = self.isBindingElement = false
elif (self.matchKeyword('delete') or self.matchKeyword('void')
or self.matchKeyword('typeof')):
startToken = self.lookahead
token = self.lex()
expr = self.inheritCoverGrammar(self.parseUnaryExpression)
expr = WrappingNode(startToken).finishUnaryExpression(
token['value'], expr)
if (self.strict and expr.operator == 'delete'
and expr.argument.type == Syntax.Identifier):
self.tolerateError(Messages.StrictDelete)
self.isAssignmentTarget = self.isBindingElement = false
else:
expr = self.parsePostfixExpression()
return expr
def binaryPrecedence(self, token, allowIn):
prec = 0
typ = token['type']
if (typ != Token.Punctuator and typ != Token.Keyword):
return 0
val = token['value']
if val == 'in' and not allowIn:
return 0
return PRECEDENCE.get(val, 0)
# 11.5 Multiplicative Operators
# 11.6 Additive Operators
# 11.7 Bitwise Shift Operators
# 11.8 Relational Operators
# 11.9 Equality Operators
# 11.10 Binary Bitwise Operators
# 11.11 Binary Logical Operators
def parseBinaryExpression(self):
marker = self.lookahead
left = self.inheritCoverGrammar(self.parseUnaryExpression)
token = self.lookahead
prec = self.binaryPrecedence(token, self.state['allowIn'])
if (prec == 0):
return left
self.isAssignmentTarget = self.isBindingElement = false
token['prec'] = prec
self.lex()
markers = [marker, self.lookahead]
right = self.isolateCoverGrammar(self.parseUnaryExpression)
stack = [left, token, right]
while True:
prec = self.binaryPrecedence(self.lookahead, self.state['allowIn'])
if not prec > 0:
break
# Reduce: make a binary expression from the three topmost entries.
while ((len(stack) > 2)
and (prec <= stack[len(stack) - 2]['prec'])):
right = stack.pop()
operator = stack.pop()['value']
left = stack.pop()
markers.pop()
expr = WrappingNode(
markers[len(markers) - 1]).finishBinaryExpression(
operator, left, right)
stack.append(expr)
# Shift
token = self.lex()
token['prec'] = prec
stack.append(token)
markers.append(self.lookahead)
expr = self.isolateCoverGrammar(self.parseUnaryExpression)
stack.append(expr)
# Final reduce to clean-up the stack.
i = len(stack) - 1
expr = stack[i]
markers.pop()
while (i > 1):
expr = WrappingNode(markers.pop()).finishBinaryExpression(
stack[i - 1]['value'], stack[i - 2], expr)
i -= 2
return expr
# 11.12 Conditional Operator
def parseConditionalExpression(self):
startToken = self.lookahead
expr = self.inheritCoverGrammar(self.parseBinaryExpression)
if (self.match('?')):
self.lex()
previousAllowIn = self.state['allowIn']
self.state['allowIn'] = true
consequent = self.isolateCoverGrammar(
self.parseAssignmentExpression)
self.state['allowIn'] = previousAllowIn
self.expect(':')
alternate = self.isolateCoverGrammar(
self.parseAssignmentExpression)
expr = WrappingNode(startToken).finishConditionalExpression(
expr, consequent, alternate)
self.isAssignmentTarget = self.isBindingElement = false
return expr
# [ES6] 14.2 Arrow Function
def parseConciseBody(self):
if (self.match('{')):
return self.parseFunctionSourceElements()
return self.isolateCoverGrammar(self.parseAssignmentExpression)
def checkPatternParam(self, options, param):
typ = param.type
if typ == Syntax.Identifier:
self.validateParam(options, param, param.name)
elif typ == Syntax.RestElement:
self.checkPatternParam(options, param.argument)
elif typ == Syntax.AssignmentPattern:
self.checkPatternParam(options, param.left)
elif typ == Syntax.ArrayPattern:
for i in xrange(len(param.elements)):
if (param.elements[i] != null):
self.checkPatternParam(options, param.elements[i])
else:
assert typ == Syntax.ObjectPattern, 'Invalid type'
for i in xrange(len(param.properties)):
self.checkPatternParam(options, param.properties[i]['value'])
def reinterpretAsCoverFormalsList(self, expr):
defaults = []
defaultCount = 0
params = [expr]
typ = expr.type
if typ == Syntax.Identifier:
pass
elif typ == PlaceHolders.ArrowParameterPlaceHolder:
params = expr.params
else:
return null
options = {'paramSet': {}}
le = len(params)
for i in xrange(le):
param = params[i]
if param.type == Syntax.AssignmentPattern:
params[i] = param.left
defaults.append(param.right)
defaultCount += 1
self.checkPatternParam(options, param.left)
else:
self.checkPatternParam(options, param)
params[i] = param
defaults.append(null)
if (options.get('message') == Messages.StrictParamDupe):
token = options.get(
'stricted') if self.strict else options['firstRestricted']
self.throwUnexpectedToken(token, options.get('message'))
if (defaultCount == 0):
defaults = []
return {
'params': params,
'defaults': defaults,
'stricted': options['stricted'],
'firstRestricted': options['firstRestricted'],
'message': options.get('message')
}
def parseArrowFunctionExpression(self, options, node):
raise Ecma51NotSupported('ArrowFunctionExpression')
if (self.hasLineTerminator):
self.tolerateUnexpectedToken(self.lookahead)
self.expect('=>')
previousStrict = self.strict
body = self.parseConciseBody()
if (self.strict and options['firstRestricted']):
self.throwUnexpectedToken(options['firstRestricted'],
options.get('message'))
if (self.strict and options['stricted']):
self.tolerateUnexpectedToken(options['stricted'],
options['message'])
self.strict = previousStrict
return node.finishArrowFunctionExpression(
options['params'], options['defaults'], body,
body.type != Syntax.BlockStatement)
# 11.13 Assignment Operators
def parseAssignmentExpression(self):
startToken = self.lookahead
token = self.lookahead
expr = self.parseConditionalExpression()
if (expr.type == PlaceHolders.ArrowParameterPlaceHolder
or self.match('=>')):
raise Ecma51NotSupported('ArrowFunctionExpression')
self.isAssignmentTarget = self.isBindingElement = false
lis = self.reinterpretAsCoverFormalsList(expr)
if (lis):
self.firstCoverInitializedNameError = null
return self.parseArrowFunctionExpression(
lis, WrappingNode(startToken))
return expr
if (self.matchAssign()):
if (not self.isAssignmentTarget):
self.tolerateError(Messages.InvalidLHSInAssignment)
# 11.13.1
if (self.strict and expr.type == Syntax.Identifier
and isRestrictedWord(expr.name)):
self.tolerateUnexpectedToken(token,
Messages.StrictLHSAssignment)
if (not self.match('=')):
self.isAssignmentTarget = self.isBindingElement = false
else:
self.reinterpretExpressionAsPattern(expr)
token = self.lex()
right = self.isolateCoverGrammar(self.parseAssignmentExpression)
expr = WrappingNode(startToken).finishAssignmentExpression(
token['value'], expr, right)
self.firstCoverInitializedNameError = null
return expr
# 11.14 Comma Operator
def parseExpression(self):
startToken = self.lookahead
expr = self.isolateCoverGrammar(self.parseAssignmentExpression)
if (self.match(',')):
expressions = [expr]
while (self.startIndex < self.length):
if (not self.match(',')):
break
self.lex()
expressions.append(
self.isolateCoverGrammar(self.parseAssignmentExpression))
expr = WrappingNode(startToken).finishSequenceExpression(
expressions)
return expr
# 12.1 Block
def parseStatementListItem(self):
if (self.lookahead['type'] == Token.Keyword):
val = (self.lookahead['value'])
if val == 'export':
raise Ecma51NotSupported('ExportDeclaration')
elif val == 'import':
raise Ecma51NotSupported('ImportDeclaration')
elif val == 'const' or val == 'let':
return self.parseLexicalDeclaration({
'inFor': false
})
elif val == 'function':
return self.parseFunctionDeclaration(Node())
elif val == 'class':
raise Ecma51NotSupported('ClassDeclaration')
elif ENABLE_PYIMPORT and val == 'pyimport': # <<<<< MODIFIED HERE
return self.parsePyimportStatement()
return self.parseStatement()
def parsePyimportStatement(self):
if not ENABLE_PYIMPORT:
raise Ecma51NotSupported('PyimportStatement')
n = Node()
self.lex()
n.finishPyimport(self.parseVariableIdentifier())
self.consumeSemicolon()
return n
def parseStatementList(self):
list = []
while (self.startIndex < self.length):
if (self.match('}')):
break
list.append(self.parseStatementListItem())
return list
def parseBlock(self):
node = Node()
self.expect('{')
block = self.parseStatementList()
self.expect('}')
return node.finishBlockStatement(block)
# 12.2 Variable Statement
def parseVariableIdentifier(self):
node = Node()
token = self.lex()
if (token['type'] != Token.Identifier):
if (self.strict and token['type'] == Token.Keyword
and isStrictModeReservedWord(token['value'])):
self.tolerateUnexpectedToken(token,
Messages.StrictReservedWord)
else:
self.throwUnexpectedToken(token)
return node.finishIdentifier(token['value'])
def parseVariableDeclaration(self):
init = null
node = Node()
d = self.parsePattern()
# 12.2.1
if (self.strict and isRestrictedWord(d.name)):
self.tolerateError(Messages.StrictVarName)
if (self.match('=')):
self.lex()
init = self.isolateCoverGrammar(self.parseAssignmentExpression)
elif (d.type != Syntax.Identifier):
self.expect('=')
return node.finishVariableDeclarator(d, init)
def parseVariableDeclarationList(self):
lis = []
while True:
lis.append(self.parseVariableDeclaration())
if (not self.match(',')):
break
self.lex()
if not (self.startIndex < self.length):
break
return lis
def parseVariableStatement(self, node):
self.expectKeyword('var')
declarations = self.parseVariableDeclarationList()
self.consumeSemicolon()
return node.finishVariableDeclaration(declarations)
def parseLexicalBinding(self, kind, options):
init = null
node = Node()
d = self.parsePattern()
# 12.2.1
if (self.strict and d.type == Syntax.Identifier
and isRestrictedWord(d.name)):
self.tolerateError(Messages.StrictVarName)
if (kind == 'const'):
if (not self.matchKeyword('in')):
self.expect('=')
init = self.isolateCoverGrammar(self.parseAssignmentExpression)
elif ((not options['inFor'] and d.type != Syntax.Identifier)
or self.match('=')):
self.expect('=')
init = self.isolateCoverGrammar(self.parseAssignmentExpression)
return node.finishVariableDeclarator(d, init)
def parseBindingList(self, kind, options):
list = []
while True:
list.append(self.parseLexicalBinding(kind, options))
if (not self.match(',')):
break
self.lex()
if not (self.startIndex < self.length):
break
return list
def parseLexicalDeclaration(self, options):
node = Node()
kind = self.lex()['value']
assert kind == 'let' or kind == 'const', 'Lexical declaration must be either let or const'
declarations = self.parseBindingList(kind, options)
self.consumeSemicolon()
return node.finishLexicalDeclaration(declarations, kind)
def parseRestElement(self):
raise Ecma51NotSupported('RestElement')
node = Node()
self.lex()
if (self.match('{')):
self.throwError(Messages.ObjectPatternAsRestParameter)
param = self.parseVariableIdentifier()
if (self.match('=')):
self.throwError(Messages.DefaultRestParameter)
if (not self.match(')')):
self.throwError(Messages.ParameterAfterRestParameter)
return node.finishRestElement(param)
# 12.3 Empty Statement
def parseEmptyStatement(self, node):
self.expect(';')
return node.finishEmptyStatement()
# 12.4 Expression Statement
def parseExpressionStatement(self, node):
expr = self.parseExpression()
self.consumeSemicolon()
return node.finishExpressionStatement(expr)
# 12.5 If statement
def parseIfStatement(self, node):
self.expectKeyword('if')
self.expect('(')
test = self.parseExpression()
self.expect(')')
consequent = self.parseStatement()
if (self.matchKeyword('else')):
self.lex()
alternate = self.parseStatement()
else:
alternate = null
return node.finishIfStatement(test, consequent, alternate)
# 12.6 Iteration Statements
def parseDoWhileStatement(self, node):
self.expectKeyword('do')
oldInIteration = self.state['inIteration']
self.state['inIteration'] = true
body = self.parseStatement()
self.state['inIteration'] = oldInIteration
self.expectKeyword('while')
self.expect('(')
test = self.parseExpression()
self.expect(')')
if (self.match(';')):
self.lex()
return node.finishDoWhileStatement(body, test)
def parseWhileStatement(self, node):
self.expectKeyword('while')
self.expect('(')
test = self.parseExpression()
self.expect(')')
oldInIteration = self.state['inIteration']
self.state['inIteration'] = true
body = self.parseStatement()
self.state['inIteration'] = oldInIteration
return node.finishWhileStatement(test, body)
def parseForStatement(self, node):
previousAllowIn = self.state['allowIn']
init = test = update = null
self.expectKeyword('for')
self.expect('(')
if (self.match(';')):
self.lex()
else:
if (self.matchKeyword('var')):
init = Node()
self.lex()
self.state['allowIn'] = false
init = init.finishVariableDeclaration(
self.parseVariableDeclarationList())
self.state['allowIn'] = previousAllowIn
if (len(init.declarations) == 1 and self.matchKeyword('in')):
self.lex()
left = init
right = self.parseExpression()
init = null
else:
self.expect(';')
elif (self.matchKeyword('const') or self.matchKeyword('let')):
init = Node()
kind = self.lex()['value']
self.state['allowIn'] = false
declarations = self.parseBindingList(kind, {'inFor': true})
self.state['allowIn'] = previousAllowIn
if (len(declarations) == 1 and declarations[0].init == null
and self.matchKeyword('in')):
init = init.finishLexicalDeclaration(declarations, kind)
self.lex()
left = init
right = self.parseExpression()
init = null
else:
self.consumeSemicolon()
init = init.finishLexicalDeclaration(declarations, kind)
else:
initStartToken = self.lookahead
self.state['allowIn'] = false
init = self.inheritCoverGrammar(self.parseAssignmentExpression)
self.state['allowIn'] = previousAllowIn
if (self.matchKeyword('in')):
if (not self.isAssignmentTarget):
self.tolerateError(Messages.InvalidLHSInForIn)
self.lex()
self.reinterpretExpressionAsPattern(init)
left = init
right = self.parseExpression()
init = null
else:
if (self.match(',')):
initSeq = [init]
while (self.match(',')):
self.lex()
initSeq.append(
self.isolateCoverGrammar(
self.parseAssignmentExpression))
init = WrappingNode(
initStartToken).finishSequenceExpression(initSeq)
self.expect(';')
if ('left' not in locals()):
if (not self.match(';')):
test = self.parseExpression()
self.expect(';')
if (not self.match(')')):
update = self.parseExpression()
self.expect(')')
oldInIteration = self.state['inIteration']
self.state['inIteration'] = true
body = self.isolateCoverGrammar(self.parseStatement)
self.state['inIteration'] = oldInIteration
return node.finishForStatement(init, test, update, body) if (
'left' not in locals()) else node.finishForInStatement(
left, right, body)
# 12.7 The continue statement
def parseContinueStatement(self, node):
label = null
self.expectKeyword('continue')
# Optimize the most common form: 'continue;'.
if ord(self.source[self.startIndex]) == 0x3B:
self.lex()
if (not self.state['inIteration']):
self.throwError(Messages.IllegalContinue)
return node.finishContinueStatement(null)
if (self.hasLineTerminator):
if (not self.state['inIteration']):
self.throwError(Messages.IllegalContinue)
return node.finishContinueStatement(null)
if (self.lookahead['type'] == Token.Identifier):
label = self.parseVariableIdentifier()
key = '$' + label.name
if not key in self.state['labelSet']: # todo make sure its correct!
self.throwError(Messages.UnknownLabel, label.name)
self.consumeSemicolon()
if (label == null and not self.state['inIteration']):
self.throwError(Messages.IllegalContinue)
return node.finishContinueStatement(label)
# 12.8 The break statement
def parseBreakStatement(self, node):
label = null
self.expectKeyword('break')
# Catch the very common case first: immediately a semicolon (U+003B).
if (ord(self.source[self.lastIndex]) == 0x3B):
self.lex()
if (not (self.state['inIteration'] or self.state['inSwitch'])):
self.throwError(Messages.IllegalBreak)
return node.finishBreakStatement(null)
if (self.hasLineTerminator):
if (not (self.state['inIteration'] or self.state['inSwitch'])):
self.throwError(Messages.IllegalBreak)
return node.finishBreakStatement(null)
if (self.lookahead['type'] == Token.Identifier):
label = self.parseVariableIdentifier()
key = '$' + label.name
if not (key in self.state['labelSet']):
self.throwError(Messages.UnknownLabel, label.name)
self.consumeSemicolon()
if (label == null
and not (self.state['inIteration'] or self.state['inSwitch'])):
self.throwError(Messages.IllegalBreak)
return node.finishBreakStatement(label)
# 12.9 The return statement
def parseReturnStatement(self, node):
argument = null
self.expectKeyword('return')
if (not self.state['inFunctionBody']):
self.tolerateError(Messages.IllegalReturn)
# 'return' followed by a space and an identifier is very common.
if (ord(self.source[self.lastIndex]) == 0x20):
if (isIdentifierStart(self.source[self.lastIndex + 1])):
argument = self.parseExpression()
self.consumeSemicolon()
return node.finishReturnStatement(argument)
if (self.hasLineTerminator):
# HACK
return node.finishReturnStatement(null)
if (not self.match(';')):
if (not self.match('}') and self.lookahead['type'] != Token.EOF):
argument = self.parseExpression()
self.consumeSemicolon()
return node.finishReturnStatement(argument)
# 12.10 The with statement
def parseWithStatement(self, node):
if (self.strict):
self.tolerateError(Messages.StrictModeWith)
self.expectKeyword('with')
self.expect('(')
obj = self.parseExpression()
self.expect(')')
body = self.parseStatement()
return node.finishWithStatement(obj, body)
# 12.10 The swith statement
def parseSwitchCase(self):
consequent = []
node = Node()
if (self.matchKeyword('default')):
self.lex()
test = null
else:
self.expectKeyword('case')
test = self.parseExpression()
self.expect(':')
while (self.startIndex < self.length):
if (self.match('}') or self.matchKeyword('default')
or self.matchKeyword('case')):
break
statement = self.parseStatementListItem()
consequent.append(statement)
return node.finishSwitchCase(test, consequent)
def parseSwitchStatement(self, node):
self.expectKeyword('switch')
self.expect('(')
discriminant = self.parseExpression()
self.expect(')')
self.expect('{')
cases = []
if (self.match('}')):
self.lex()
return node.finishSwitchStatement(discriminant, cases)
oldInSwitch = self.state['inSwitch']
self.state['inSwitch'] = true
defaultFound = false
while (self.startIndex < self.length):
if (self.match('}')):
break
clause = self.parseSwitchCase()
if (clause.test == null):
if (defaultFound):
self.throwError(Messages.MultipleDefaultsInSwitch)
defaultFound = true
cases.append(clause)
self.state['inSwitch'] = oldInSwitch
self.expect('}')
return node.finishSwitchStatement(discriminant, cases)
# 12.13 The throw statement
def parseThrowStatement(self, node):
self.expectKeyword('throw')
if (self.hasLineTerminator):
self.throwError(Messages.NewlineAfterThrow)
argument = self.parseExpression()
self.consumeSemicolon()
return node.finishThrowStatement(argument)
# 12.14 The try statement
def parseCatchClause(self):
node = Node()
self.expectKeyword('catch')
self.expect('(')
if (self.match(')')):
self.throwUnexpectedToken(self.lookahead)
param = self.parsePattern()
# 12.14.1
if (self.strict and isRestrictedWord(param.name)):
self.tolerateError(Messages.StrictCatchVariable)
self.expect(')')
body = self.parseBlock()
return node.finishCatchClause(param, body)
def parseTryStatement(self, node):
handler = null
finalizer = null
self.expectKeyword('try')
block = self.parseBlock()
if (self.matchKeyword('catch')):
handler = self.parseCatchClause()
if (self.matchKeyword('finally')):
self.lex()
finalizer = self.parseBlock()
if (not handler and not finalizer):
self.throwError(Messages.NoCatchOrFinally)
return node.finishTryStatement(block, handler, finalizer)
# 12.15 The debugger statement
def parseDebuggerStatement(self, node):
self.expectKeyword('debugger')
self.consumeSemicolon()
return node.finishDebuggerStatement()
# 12 Statements
def parseStatement(self):
typ = self.lookahead['type']
if (typ == Token.EOF):
self.throwUnexpectedToken(self.lookahead)
if (typ == Token.Punctuator and self.lookahead['value'] == '{'):
return self.parseBlock()
self.isAssignmentTarget = self.isBindingElement = true
node = Node()
val = self.lookahead['value']
if (typ == Token.Punctuator):
if val == ';':
return self.parseEmptyStatement(node)
elif val == '(':
return self.parseExpressionStatement(node)
elif (typ == Token.Keyword):
if val == 'break':
return self.parseBreakStatement(node)
elif val == 'continue':
return self.parseContinueStatement(node)
elif val == 'debugger':
return self.parseDebuggerStatement(node)
elif val == 'do':
return self.parseDoWhileStatement(node)
elif val == 'for':
return self.parseForStatement(node)
elif val == 'function':
return self.parseFunctionDeclaration(node)
elif val == 'if':
return self.parseIfStatement(node)
elif val == 'return':
return self.parseReturnStatement(node)
elif val == 'switch':
return self.parseSwitchStatement(node)
elif val == 'throw':
return self.parseThrowStatement(node)
elif val == 'try':
return self.parseTryStatement(node)
elif val == 'var':
return self.parseVariableStatement(node)
elif val == 'while':
return self.parseWhileStatement(node)
elif val == 'with':
return self.parseWithStatement(node)
expr = self.parseExpression()
# 12.12 Labelled Statements
if ((expr.type == Syntax.Identifier) and self.match(':')):
self.lex()
key = '$' + expr.name
if key in self.state['labelSet']:
self.throwError(Messages.Redeclaration, 'Label', expr.name)
self.state['labelSet'][key] = true
labeledBody = self.parseStatement()
del self.state['labelSet'][key]
return node.finishLabeledStatement(expr, labeledBody)
self.consumeSemicolon()
return node.finishExpressionStatement(expr)
# 13 Function Definition
def parseFunctionSourceElements(self):
body = []
node = Node()
firstRestricted = None
self.expect('{')
while (self.startIndex < self.length):
if (self.lookahead['type'] != Token.StringLiteral):
break
token = self.lookahead
statement = self.parseStatementListItem()
body.append(statement)
if (statement.expression.type != Syntax.Literal):
# this is not directive
break
directive = self.source[token['start'] + 1:token['end'] - 1]
if (directive == 'use strict'):
self.strict = true
if (firstRestricted):
self.tolerateUnexpectedToken(firstRestricted,
Messages.StrictOctalLiteral)
else:
if (not firstRestricted and token.get('octal')):
firstRestricted = token
oldLabelSet = self.state['labelSet']
oldInIteration = self.state['inIteration']
oldInSwitch = self.state['inSwitch']
oldInFunctionBody = self.state['inFunctionBody']
oldParenthesisCount = self.state['parenthesizedCount']
self.state['labelSet'] = {}
self.state['inIteration'] = false
self.state['inSwitch'] = false
self.state['inFunctionBody'] = true
self.state['parenthesizedCount'] = 0
while (self.startIndex < self.length):
if (self.match('}')):
break
body.append(self.parseStatementListItem())
self.expect('}')
self.state['labelSet'] = oldLabelSet
self.state['inIteration'] = oldInIteration
self.state['inSwitch'] = oldInSwitch
self.state['inFunctionBody'] = oldInFunctionBody
self.state['parenthesizedCount'] = oldParenthesisCount
return node.finishBlockStatement(body)
def validateParam(self, options, param, name):
key = '$' + name
if (self.strict):
if (isRestrictedWord(name)):
options['stricted'] = param
options['message'] = Messages.StrictParamName
if key in options['paramSet']:
options['stricted'] = param
options['message'] = Messages.StrictParamDupe
elif (not options['firstRestricted']):
if (isRestrictedWord(name)):
options['firstRestricted'] = param
options['message'] = Messages.StrictParamName
elif (isStrictModeReservedWord(name)):
options['firstRestricted'] = param
options['message'] = Messages.StrictReservedWord
elif key in options['paramSet']:
options['firstRestricted'] = param
options['message'] = Messages.StrictParamDupe
options['paramSet'][key] = true
def parseParam(self, options):
token = self.lookahead
de = None
if (token['value'] == '...'):
param = self.parseRestElement()
self.validateParam(options, param.argument, param.argument.name)
options['params'].append(param)
options['defaults'].append(null)
return false
param = self.parsePatternWithDefault()
self.validateParam(options, token, token['value'])
if (param.type == Syntax.AssignmentPattern):
de = param.right
param = param.left
options['defaultCount'] += 1
options['params'].append(param)
options['defaults'].append(de)
return not self.match(')')
def parseParams(self, firstRestricted):
options = {
'params': [],
'defaultCount': 0,
'defaults': [],
'firstRestricted': firstRestricted
}
self.expect('(')
if (not self.match(')')):
options['paramSet'] = {}
while (self.startIndex < self.length):
if (not self.parseParam(options)):
break
self.expect(',')
self.expect(')')
if (options['defaultCount'] == 0):
options['defaults'] = []
return {
'params': options['params'],
'defaults': options['defaults'],
'stricted': options.get('stricted'),
'firstRestricted': options.get('firstRestricted'),
'message': options.get('message')
}
def parseFunctionDeclaration(self, node, identifierIsOptional=None):
d = null
params = []
defaults = []
message = None
firstRestricted = None
self.expectKeyword('function')
if (identifierIsOptional or not self.match('(')):
token = self.lookahead
d = self.parseVariableIdentifier()
if (self.strict):
if (isRestrictedWord(token['value'])):
self.tolerateUnexpectedToken(token,
Messages.StrictFunctionName)
else:
if (isRestrictedWord(token['value'])):
firstRestricted = token
message = Messages.StrictFunctionName
elif (isStrictModeReservedWord(token['value'])):
firstRestricted = token
message = Messages.StrictReservedWord
tmp = self.parseParams(firstRestricted)
params = tmp['params']
defaults = tmp['defaults']
stricted = tmp.get('stricted')
firstRestricted = tmp['firstRestricted']
if (tmp.get('message')):
message = tmp['message']
previousStrict = self.strict
body = self.parseFunctionSourceElements()
if (self.strict and firstRestricted):
self.throwUnexpectedToken(firstRestricted, message)
if (self.strict and stricted):
self.tolerateUnexpectedToken(stricted, message)
self.strict = previousStrict
return node.finishFunctionDeclaration(d, params, defaults, body)
def parseFunctionExpression(self):
id = null
params = []
defaults = []
node = Node()
firstRestricted = None
message = None
self.expectKeyword('function')
if (not self.match('(')):
token = self.lookahead
id = self.parseVariableIdentifier()
if (self.strict):
if (isRestrictedWord(token['value'])):
self.tolerateUnexpectedToken(token,
Messages.StrictFunctionName)
else:
if (isRestrictedWord(token['value'])):
firstRestricted = token
message = Messages.StrictFunctionName
elif (isStrictModeReservedWord(token['value'])):
firstRestricted = token
message = Messages.StrictReservedWord
tmp = self.parseParams(firstRestricted)
params = tmp['params']
defaults = tmp['defaults']
stricted = tmp.get('stricted')
firstRestricted = tmp['firstRestricted']
if (tmp.get('message')):
message = tmp['message']
previousStrict = self.strict
body = self.parseFunctionSourceElements()
if (self.strict and firstRestricted):
self.throwUnexpectedToken(firstRestricted, message)
if (self.strict and stricted):
self.tolerateUnexpectedToken(stricted, message)
self.strict = previousStrict
return node.finishFunctionExpression(id, params, defaults, body)
# todo Translate parse class functions!
def parseClassExpression(self):
raise Ecma51NotSupported('ClassExpression')
def parseClassDeclaration(self):
raise Ecma51NotSupported('ClassDeclaration')
# 14 Program
def parseScriptBody(self):
body = []
firstRestricted = None
while (self.startIndex < self.length):
token = self.lookahead
if (token['type'] != Token.StringLiteral):
break
statement = self.parseStatementListItem()
body.append(statement)
if (statement.expression.type != Syntax.Literal):
# this is not directive
break
directive = self.source[token['start'] + 1:token['end'] - 1]
if (directive == 'use strict'):
self.strict = true
if (firstRestricted):
self.tolerateUnexpectedToken(firstRestricted,
Messages.StrictOctalLiteral)
else:
if (not firstRestricted and token.get('octal')):
firstRestricted = token
while (self.startIndex < self.length):
statement = self.parseStatementListItem()
# istanbul ignore if
if (statement is None):
break
body.append(statement)
return body
def parseProgram(self):
self.peek()
node = Node()
body = self.parseScriptBody()
return node.finishProgram(body)
# DONE!!!
def parse(self, code, options={}):
if options:
raise NotImplementedError(
'Options not implemented! You can only use default settings.')
self.clean()
self.source = unicode(
code
) + ' \n ; //END' # I have to add it in order not to check for EOF every time
self.index = 0
self.lineNumber = 1 if len(self.source) > 0 else 0
self.lineStart = 0
self.startIndex = self.index
self.startLineNumber = self.lineNumber
self.startLineStart = self.lineStart
self.length = len(self.source)
self.lookahead = null
self.state = {
'allowIn': true,
'labelSet': {},
'inFunctionBody': false,
'inIteration': false,
'inSwitch': false,
'lastCommentStart': -1,
'curlyStack': [],
'parenthesizedCount': None
}
self.sourceType = 'script'
self.strict = false
try:
program = self.parseProgram()
except Ecma51NotSupported as e:
raise self.createError(self.lineNumber, self.lastIndex, unicode(e))
return node_to_dict(program)
def parse(javascript_code):
"""Returns syntax tree of javascript_code.
Same as PyJsParser().parse For your convenience :) """
p = PyJsParser()
return p.parse(javascript_code)
if __name__ == '__main__':
import time
test_path = None
if test_path:
f = open(test_path, 'rb')
x = f.read()
f.close()
else:
x = 'var $ = "Hello!"'
p = PyJsParser()
t = time.time()
res = p.parse(x)
dt = time.time() - t + 0.000000001
if test_path:
print(len(res))
else:
pprint(res)
print()
print('Parsed everyting in', round(dt, 5), 'seconds.')
print('Thats %d characters per second' % int(len(x) / dt))