diff --git a/libs/js2py/base.py b/libs/js2py/base.py index 67c80d599..cf1eca08d 100644 --- a/libs/js2py/base.py +++ b/libs/js2py/base.py @@ -5,6 +5,7 @@ import re from .translators.friendly_nodes import REGEXP_CONVERTER from .utils.injector import fix_js_args from types import FunctionType, ModuleType, GeneratorType, BuiltinFunctionType, MethodType, BuiltinMethodType +from math import floor, log10 import traceback try: import numpy @@ -603,15 +604,7 @@ class PyJs(object): elif typ == 'Boolean': return Js('true') if self.value else Js('false') elif typ == 'Number': #or self.Class=='Number': - if self.is_nan(): - return Js('NaN') - elif self.is_infinity(): - sign = '-' if self.value < 0 else '' - return Js(sign + 'Infinity') - elif isinstance(self.value, - long) or self.value.is_integer(): # dont print .0 - return Js(unicode(int(self.value))) - return Js(unicode(self.value)) # accurate enough + return Js(unicode(js_dtoa(self.value))) elif typ == 'String': return self else: #object @@ -1046,7 +1039,7 @@ def PyJsComma(a, b): return b -from .internals.simplex import JsException as PyJsException +from .internals.simplex import JsException as PyJsException, js_dtoa import pyjsparser pyjsparser.parser.ENABLE_JS2PY_ERRORS = lambda msg: MakeError('SyntaxError', msg) diff --git a/libs/js2py/evaljs.py b/libs/js2py/evaljs.py index 3f5eeee53..64eea5c4c 100644 --- a/libs/js2py/evaljs.py +++ b/libs/js2py/evaljs.py @@ -116,10 +116,12 @@ def eval_js(js): def eval_js6(js): + """Just like eval_js but with experimental support for js6 via babel.""" return eval_js(js6_to_js5(js)) def translate_js6(js): + """Just like translate_js but with experimental support for js6 via babel.""" return translate_js(js6_to_js5(js)) diff --git a/libs/js2py/internals/base.py b/libs/js2py/internals/base.py index ec277c64d..a02a21229 100644 --- a/libs/js2py/internals/base.py +++ b/libs/js2py/internals/base.py @@ -3,15 +3,19 @@ import re import datetime -from desc import * -from simplex import * -from conversions import * -import six -from pyjsparser import PyJsParser -from itertools import izip +from .desc import * +from .simplex import * +from .conversions import * + +from pyjsparser import PyJsParser + +import six +if six.PY2: + from itertools import izip +else: + izip = zip + -from conversions import * -from simplex import * def Type(obj): diff --git a/libs/js2py/internals/byte_trans.py b/libs/js2py/internals/byte_trans.py index 87fab4b4e..e32bcb1e2 100644 --- a/libs/js2py/internals/byte_trans.py +++ b/libs/js2py/internals/byte_trans.py @@ -1,8 +1,8 @@ -from code import Code -from simplex import MakeError -from opcodes import * -from operations import * -from trans_utils import * +from .code import Code +from .simplex import MakeError +from .opcodes import * +from .operations import * +from .trans_utils import * SPECIAL_IDENTIFIERS = {'true', 'false', 'this'} @@ -465,10 +465,11 @@ class ByteCodeGenerator: self.emit('LOAD_OBJECT', tuple(data)) def Program(self, body, **kwargs): + old_tape_len = len(self.exe.tape) self.emit('LOAD_UNDEFINED') self.emit(body) # add function tape ! - self.exe.tape = self.function_declaration_tape + self.exe.tape + self.exe.tape = self.exe.tape[:old_tape_len] + self.function_declaration_tape + self.exe.tape[old_tape_len:] def Pyimport(self, imp, **kwargs): raise NotImplementedError( @@ -735,17 +736,17 @@ def main(): # # } a.emit(d) - print a.declared_vars - print a.exe.tape - print len(a.exe.tape) + print(a.declared_vars) + print(a.exe.tape) + print(len(a.exe.tape)) a.exe.compile() def log(this, args): - print args[0] + print(args[0]) return 999 - print a.exe.run(a.exe.space.GlobalObj) + print(a.exe.run(a.exe.space.GlobalObj)) if __name__ == '__main__': diff --git a/libs/js2py/internals/code.py b/libs/js2py/internals/code.py index 6bd6739fd..9af0e602b 100644 --- a/libs/js2py/internals/code.py +++ b/libs/js2py/internals/code.py @@ -1,16 +1,17 @@ -from opcodes import * -from space import * -from base import * +from .opcodes import * +from .space import * +from .base import * class Code: '''Can generate, store and run sequence of ops representing js code''' - def __init__(self, is_strict=False): + def __init__(self, is_strict=False, debug_mode=False): self.tape = [] self.compiled = False self.label_locs = None self.is_strict = is_strict + self.debug_mode = debug_mode self.contexts = [] self.current_ctx = None @@ -22,6 +23,10 @@ class Code: self.GLOBAL_THIS = None self.space = None + # dbg + self.ctx_depth = 0 + + def get_new_label(self): self._label_count += 1 return self._label_count @@ -74,21 +79,35 @@ class Code: # 0=normal, 1=return, 2=jump_outside, 3=errors # execute_fragment_under_context returns: # (return_value, typ, return_value/jump_loc/py_error) - # ctx.stack must be len 1 and its always empty after the call. + # IMPARTANT: It is guaranteed that the length of the ctx.stack is unchanged. ''' old_curr_ctx = self.current_ctx + self.ctx_depth += 1 + old_stack_len = len(ctx.stack) + old_ret_len = len(self.return_locs) + old_ctx_len = len(self.contexts) try: self.current_ctx = ctx return self._execute_fragment_under_context( ctx, start_label, end_label) except JsException as err: - # undo the things that were put on the stack (if any) - # don't worry, I know the recovery is possible through try statement and for this reason try statement - # has its own context and stack so it will not delete the contents of the outer stack - del ctx.stack[:] + if self.debug_mode: + self._on_fragment_exit("js errors") + # undo the things that were put on the stack (if any) to ensure a proper error recovery + del ctx.stack[old_stack_len:] + del self.return_locs[old_ret_len:] + del self.contexts[old_ctx_len :] return undefined, 3, err finally: + self.ctx_depth -= 1 self.current_ctx = old_curr_ctx + assert old_stack_len == len(ctx.stack) + + def _get_dbg_indent(self): + return self.ctx_depth * ' ' + + def _on_fragment_exit(self, mode): + print(self._get_dbg_indent() + 'ctx exit (%s)' % mode) def _execute_fragment_under_context(self, ctx, start_label, end_label): start, end = self.label_locs[start_label], self.label_locs[end_label] @@ -97,16 +116,20 @@ class Code: entry_level = len(self.contexts) # for e in self.tape[start:end]: # print e - + if self.debug_mode: + print(self._get_dbg_indent() + 'ctx entry (from:%d, to:%d)' % (start, end)) while loc < len(self.tape): - #print loc, self.tape[loc] if len(self.contexts) == entry_level and loc >= end: + if self.debug_mode: + self._on_fragment_exit('normal') assert loc == end - assert len(ctx.stack) == ( - 1 + initial_len), 'Stack change must be equal to +1!' + delta_stack = len(ctx.stack) - initial_len + assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack return ctx.stack.pop(), 0, None # means normal return # execute instruction + if self.debug_mode: + print(self._get_dbg_indent() + str(loc), self.tape[loc]) status = self.tape[loc].eval(ctx) # check status for special actions @@ -116,9 +139,10 @@ class Code: if len(self.contexts) == entry_level: # check if jumped outside of the fragment and break if so if not start <= loc < end: - assert len(ctx.stack) == ( - 1 + initial_len - ), 'Stack change must be equal to +1!' + if self.debug_mode: + self._on_fragment_exit('jump outside loc:%d label:%d' % (loc, status)) + delta_stack = len(ctx.stack) - initial_len + assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack return ctx.stack.pop(), 2, status # jump outside continue @@ -137,7 +161,10 @@ class Code: # return: (None, None) else: if len(self.contexts) == entry_level: - assert len(ctx.stack) == 1 + initial_len + if self.debug_mode: + self._on_fragment_exit('return') + delta_stack = len(ctx.stack) - initial_len + assert delta_stack == +1, 'Stack change must be equal to +1! got %d' % delta_stack return undefined, 1, ctx.stack.pop( ) # return signal return_value = ctx.stack.pop() @@ -149,6 +176,8 @@ class Code: continue # next instruction loc += 1 + if self.debug_mode: + self._on_fragment_exit('internal error - unexpected end of tape, will crash') assert False, 'Remember to add NOP at the end!' def run(self, ctx, starting_loc=0): @@ -156,7 +185,8 @@ class Code: self.current_ctx = ctx while loc < len(self.tape): # execute instruction - #print loc, self.tape[loc] + if self.debug_mode: + print(loc, self.tape[loc]) status = self.tape[loc].eval(ctx) # check status for special actions diff --git a/libs/js2py/internals/constructors/jsfunction.py b/libs/js2py/internals/constructors/jsfunction.py index 9728fb382..d62731ac3 100644 --- a/libs/js2py/internals/constructors/jsfunction.py +++ b/libs/js2py/internals/constructors/jsfunction.py @@ -42,6 +42,7 @@ def executable_code(code_str, space, global_context=True): space.byte_generator.emit('LABEL', skip) space.byte_generator.emit('NOP') space.byte_generator.restore_state() + space.byte_generator.exe.compile( start_loc=old_tape_len ) # dont read the code from the beginning, dont be stupid! @@ -71,5 +72,5 @@ def _eval(this, args): def log(this, args): - print ' '.join(map(to_string, args)) + print(' '.join(map(to_string, args))) return undefined diff --git a/libs/js2py/internals/conversions.py b/libs/js2py/internals/conversions.py index b90a427d2..8b2c7c308 100644 --- a/libs/js2py/internals/conversions.py +++ b/libs/js2py/internals/conversions.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals # Type Conversions. to_type. All must return PyJs subclass instance -from simplex import * +from .simplex import * def to_primitive(self, hint=None): @@ -73,14 +73,7 @@ def to_string(self): elif typ == 'Boolean': return 'true' if self else 'false' elif typ == 'Number': # or self.Class=='Number': - if is_nan(self): - return 'NaN' - elif is_infinity(self): - sign = '-' if self < 0 else '' - return sign + 'Infinity' - elif int(self) == self: # integer value! - return unicode(int(self)) - return unicode(self) # todo make it print exactly like node.js + return js_dtoa(self) else: # object return to_string(to_primitive(self, 'String')) diff --git a/libs/js2py/internals/fill_space.py b/libs/js2py/internals/fill_space.py index 9aa9c4d21..329c8b28f 100644 --- a/libs/js2py/internals/fill_space.py +++ b/libs/js2py/internals/fill_space.py @@ -1,29 +1,22 @@ from __future__ import unicode_literals -from base import Scope -from func_utils import * -from conversions import * +from .base import Scope +from .func_utils import * +from .conversions import * import six -from prototypes.jsboolean import BooleanPrototype -from prototypes.jserror import ErrorPrototype -from prototypes.jsfunction import FunctionPrototype -from prototypes.jsnumber import NumberPrototype -from prototypes.jsobject import ObjectPrototype -from prototypes.jsregexp import RegExpPrototype -from prototypes.jsstring import StringPrototype -from prototypes.jsarray import ArrayPrototype -import prototypes.jsjson as jsjson -import prototypes.jsutils as jsutils +from .prototypes.jsboolean import BooleanPrototype +from .prototypes.jserror import ErrorPrototype +from .prototypes.jsfunction import FunctionPrototype +from .prototypes.jsnumber import NumberPrototype +from .prototypes.jsobject import ObjectPrototype +from .prototypes.jsregexp import RegExpPrototype +from .prototypes.jsstring import StringPrototype +from .prototypes.jsarray import ArrayPrototype +from .prototypes import jsjson +from .prototypes import jsutils + +from .constructors import jsnumber, jsstring, jsarray, jsboolean, jsregexp, jsmath, jsobject, jsfunction, jsconsole -from constructors import jsnumber -from constructors import jsstring -from constructors import jsarray -from constructors import jsboolean -from constructors import jsregexp -from constructors import jsmath -from constructors import jsobject -from constructors import jsfunction -from constructors import jsconsole def fill_proto(proto, proto_class, space): @@ -155,7 +148,10 @@ def fill_space(space, byte_generator): j = easy_func(creator, space) j.name = unicode(typ) - j.prototype = space.ERROR_TYPES[typ] + + set_protected(j, 'prototype', space.ERROR_TYPES[typ]) + + set_non_enumerable(space.ERROR_TYPES[typ], 'constructor', j) def new_create(args, space): message = get_arg(args, 0) @@ -178,6 +174,7 @@ def fill_space(space, byte_generator): setattr(space, err_type_name + u'Prototype', extra_err) error_constructors[err_type_name] = construct_constructor( err_type_name) + assert space.TypeErrorPrototype is not None # RegExp diff --git a/libs/js2py/internals/func_utils.py b/libs/js2py/internals/func_utils.py index 3c0b8d576..58dfef9ee 100644 --- a/libs/js2py/internals/func_utils.py +++ b/libs/js2py/internals/func_utils.py @@ -1,5 +1,5 @@ -from simplex import * -from conversions import * +from .simplex import * +from .conversions import * import six if six.PY3: diff --git a/libs/js2py/internals/opcodes.py b/libs/js2py/internals/opcodes.py index 0f3127db7..15c57ccd1 100644 --- a/libs/js2py/internals/opcodes.py +++ b/libs/js2py/internals/opcodes.py @@ -1,5 +1,5 @@ -from operations import * -from base import get_member, get_member_dot, PyJsFunction, Scope +from .operations import * +from .base import get_member, get_member_dot, PyJsFunction, Scope class OP_CODE(object): diff --git a/libs/js2py/internals/operations.py b/libs/js2py/internals/operations.py index d9875088c..35b901794 100644 --- a/libs/js2py/internals/operations.py +++ b/libs/js2py/internals/operations.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals -from simplex import * -from conversions import * +from .simplex import * +from .conversions import * # ------------------------------------------------------------------------------ # Unary operations diff --git a/libs/js2py/internals/prototypes/jsstring.py b/libs/js2py/internals/prototypes/jsstring.py index b56246e25..be38802ef 100644 --- a/libs/js2py/internals/prototypes/jsstring.py +++ b/libs/js2py/internals/prototypes/jsstring.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from ..conversions import * from ..func_utils import * -from jsregexp import RegExpExec +from .jsregexp import RegExpExec DIGS = set(u'0123456789') WHITE = u"\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF" diff --git a/libs/js2py/internals/seval.py b/libs/js2py/internals/seval.py index c4404ab77..cd8ea50fa 100644 --- a/libs/js2py/internals/seval.py +++ b/libs/js2py/internals/seval.py @@ -1,11 +1,9 @@ import pyjsparser -from space import Space -import fill_space -from byte_trans import ByteCodeGenerator -from code import Code -from simplex import MakeError -import sys -sys.setrecursionlimit(100000) +from .space import Space +from . import fill_space +from .byte_trans import ByteCodeGenerator +from .code import Code +from .simplex import * pyjsparser.parser.ENABLE_JS2PY_ERRORS = lambda msg: MakeError(u'SyntaxError', unicode(msg)) @@ -16,8 +14,8 @@ def get_js_bytecode(js): a.emit(d) return a.exe.tape -def eval_js_vm(js): - a = ByteCodeGenerator(Code()) +def eval_js_vm(js, debug=False): + a = ByteCodeGenerator(Code(debug_mode=debug)) s = Space() a.exe.space = s s.exe = a.exe @@ -26,7 +24,10 @@ def eval_js_vm(js): a.emit(d) fill_space.fill_space(s, a) - # print a.exe.tape + if debug: + from pprint import pprint + pprint(a.exe.tape) + print() a.exe.compile() return a.exe.run(a.exe.space.GlobalObj) diff --git a/libs/js2py/internals/simplex.py b/libs/js2py/internals/simplex.py index b05f6174e..4cd247eb2 100644 --- a/libs/js2py/internals/simplex.py +++ b/libs/js2py/internals/simplex.py @@ -1,6 +1,10 @@ from __future__ import unicode_literals import six - +if six.PY3: + basestring = str + long = int + xrange = range + unicode = str #Undefined class PyJsUndefined(object): @@ -75,7 +79,7 @@ def is_callable(self): def is_infinity(self): - return self == float('inf') or self == -float('inf') + return self == Infinity or self == -Infinity def is_nan(self): @@ -114,7 +118,7 @@ class JsException(Exception): return self.mes.to_string().value else: if self.throw is not None: - from conversions import to_string + from .conversions import to_string return to_string(self.throw) else: return self.typ + ': ' + self.message @@ -131,3 +135,26 @@ def value_from_js_exception(js_exception, space): return js_exception.throw else: return space.NewError(js_exception.typ, js_exception.message) + + +def js_dtoa(number): + if is_nan(number): + return u'NaN' + elif is_infinity(number): + if number > 0: + return u'Infinity' + return u'-Infinity' + elif number == 0.: + return u'0' + elif abs(number) < 1e-6 or abs(number) >= 1e21: + frac, exponent = unicode(repr(float(number))).split('e') + # Remove leading zeros from the exponent. + exponent = int(exponent) + return frac + ('e' if exponent < 0 else 'e+') + unicode(exponent) + elif abs(number) < 1e-4: # python starts to return exp notation while we still want the prec + frac, exponent = unicode(repr(float(number))).split('e-') + base = u'0.' + u'0' * (int(exponent) - 1) + frac.lstrip('-').replace('.', '') + return base if number > 0. else u'-' + base + elif isinstance(number, long) or number.is_integer(): # dont print .0 + return unicode(int(number)) + return unicode(repr(number)) # python representation should be equivalent. diff --git a/libs/js2py/internals/space.py b/libs/js2py/internals/space.py index 7283070cd..cb2e77ae0 100644 --- a/libs/js2py/internals/space.py +++ b/libs/js2py/internals/space.py @@ -1,5 +1,5 @@ -from base import * -from simplex import * +from .base import * +from .simplex import * class Space(object): diff --git a/libs/js2py/internals/trans_utils.py b/libs/js2py/internals/trans_utils.py index 235b46a85..f99c09945 100644 --- a/libs/js2py/internals/trans_utils.py +++ b/libs/js2py/internals/trans_utils.py @@ -1,3 +1,10 @@ +import six +if six.PY3: + basestring = str + long = int + xrange = range + unicode = str + def to_key(literal_or_identifier): ''' returns string representation of this object''' if literal_or_identifier['type'] == 'Identifier': diff --git a/libs/js2py/prototypes/jsfunction.py b/libs/js2py/prototypes/jsfunction.py index f9598a317..2ed417e0d 100644 --- a/libs/js2py/prototypes/jsfunction.py +++ b/libs/js2py/prototypes/jsfunction.py @@ -6,8 +6,6 @@ if six.PY3: xrange = range unicode = str -# todo fix apply and bind - class FunctionPrototype: def toString(): @@ -41,6 +39,7 @@ class FunctionPrototype: return this.call(obj, args) def bind(thisArg): + arguments_ = arguments target = this if not target.is_callable(): raise this.MakeError( @@ -48,5 +47,5 @@ class FunctionPrototype: if len(arguments) <= 1: args = () else: - args = tuple([arguments[e] for e in xrange(1, len(arguments))]) + args = tuple([arguments_[e] for e in xrange(1, len(arguments_))]) return this.PyJsBoundFunction(target, thisArg, args) diff --git a/libs/js2py/translators/translating_nodes.py b/libs/js2py/translators/translating_nodes.py index 286714f91..0ae93dd9a 100644 --- a/libs/js2py/translators/translating_nodes.py +++ b/libs/js2py/translators/translating_nodes.py @@ -345,7 +345,7 @@ def BlockStatement(type, body): body) # never returns empty string! In the worst case returns pass\n -def ExpressionStatement(type, expression, **ommit): +def ExpressionStatement(type, expression): return trans(expression) + '\n' # end expression space with new line diff --git a/libs/pysubs2/cli.py b/libs/pysubs2/cli.py index f28cfcba6..fc82bf9b5 100644 --- a/libs/pysubs2/cli.py +++ b/libs/pysubs2/cli.py @@ -163,3 +163,13 @@ class Pysubs2CLI(object): elif args.transform_framerate is not None: in_fps, out_fps = args.transform_framerate subs.transform_framerate(in_fps, out_fps) + + +def __main__(): + cli = Pysubs2CLI() + rv = cli(sys.argv[1:]) + sys.exit(rv) + + +if __name__ == "__main__": + __main__() diff --git a/libs/pysubs2/common.py b/libs/pysubs2/common.py index 08738eb5c..2f95ccf44 100644 --- a/libs/pysubs2/common.py +++ b/libs/pysubs2/common.py @@ -17,12 +17,14 @@ class Color(_Color): return _Color.__new__(cls, r, g, b, a) #: Version of the pysubs2 library. -VERSION = "0.2.1" +VERSION = "0.2.3" PY3 = sys.version_info.major == 3 if PY3: text_type = str + binary_string_type = bytes else: text_type = unicode + binary_string_type = str diff --git a/libs/pysubs2/formats.py b/libs/pysubs2/formats.py index 03fba8e60..5c25a6e96 100644 --- a/libs/pysubs2/formats.py +++ b/libs/pysubs2/formats.py @@ -3,7 +3,7 @@ from .microdvd import MicroDVDFormat from .subrip import SubripFormat from .jsonformat import JSONFormat from .substation import SubstationFormat -from .txt_generic import TXTGenericFormat, MPL2Format +from .mpl2 import MPL2Format from .exceptions import * #: Dict mapping file extensions to format identifiers. @@ -13,7 +13,6 @@ FILE_EXTENSION_TO_FORMAT_IDENTIFIER = { ".ssa": "ssa", ".sub": "microdvd", ".json": "json", - ".txt": "txt_generic", } #: Dict mapping format identifiers to implementations (FormatBase subclasses). @@ -23,7 +22,6 @@ FORMAT_IDENTIFIER_TO_FORMAT_CLASS = { "ssa": SubstationFormat, "microdvd": MicroDVDFormat, "json": JSONFormat, - "txt_generic": TXTGenericFormat, "mpl2": MPL2Format, } diff --git a/libs/pysubs2/ssastyle.py b/libs/pysubs2/ssastyle.py index 522f8ce0d..2fcadc7ed 100644 --- a/libs/pysubs2/ssastyle.py +++ b/libs/pysubs2/ssastyle.py @@ -78,7 +78,7 @@ class SSAStyle(object): s += "%rpx " % self.fontsize if self.bold: s += "bold " if self.italic: s += "italic " - s += "'%s'>" % self.fontname + s += "{!r}>".format(self.fontname) if not PY3: s = s.encode("utf-8") return s diff --git a/libs/pysubs2/subrip.py b/libs/pysubs2/subrip.py index 7fa3f29b2..fea4eade6 100644 --- a/libs/pysubs2/subrip.py +++ b/libs/pysubs2/subrip.py @@ -46,8 +46,16 @@ class SubripFormat(FormatBase): following_lines[-1].append(line) def prepare_text(lines): + # Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s) + # followed by number line and timestamp line of the next subtitle. Fixes issue #11. + if (len(lines) >= 2 + and all(re.match("\s*$", line) for line in lines[:-1]) + and re.match("\s*\d+\s*$", lines[-1])): + return "" + + # Handle the general case. s = "".join(lines).strip() - s = re.sub(r"\n* *\d+ *$", "", s) # strip number of next subtitle + s = re.sub(r"\n+ *\d+ *$", "", s) # strip number of next subtitle s = re.sub(r"< *i *>", r"{\i1}", s) s = re.sub(r"< */ *i *>", r"{\i0}", s) s = re.sub(r"< *s *>", r"{\s1}", s) diff --git a/libs/pysubs2/substation.py b/libs/pysubs2/substation.py index fc4172a49..f810a4776 100644 --- a/libs/pysubs2/substation.py +++ b/libs/pysubs2/substation.py @@ -4,7 +4,7 @@ from numbers import Number from .formatbase import FormatBase from .ssaevent import SSAEvent from .ssastyle import SSAStyle -from .common import text_type, Color +from .common import text_type, Color, PY3, binary_string_type from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7) @@ -150,14 +150,7 @@ class SubstationFormat(FormatBase): if format_ == "ass": return ass_rgba_to_color(v) else: - try: - return ssa_rgb_to_color(v) - except ValueError: - try: - return ass_rgba_to_color(v) - except: - return Color(255, 255, 255, 0) - + return ssa_rgb_to_color(v) elif f in {"bold", "underline", "italic", "strikeout"}: return v == "-1" elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}: @@ -229,7 +222,7 @@ class SubstationFormat(FormatBase): for k, v in subs.aegisub_project.items(): print(k, v, sep=": ", file=fp) - def field_to_string(f, v): + def field_to_string(f, v, line): if f in {"start", "end"}: return ms_to_timestamp(v) elif f == "marked": @@ -240,23 +233,31 @@ class SubstationFormat(FormatBase): return "-1" if v else "0" elif isinstance(v, (text_type, Number)): return text_type(v) + elif not PY3 and isinstance(v, binary_string_type): + # A convenience feature, see issue #12 - accept non-unicode strings + # when they are ASCII; this is useful in Python 2, especially for non-text + # fields like style names, where requiring Unicode type seems too stringent + if all(ord(c) < 128 for c in v): + return text_type(v) + else: + raise TypeError("Encountered binary string with non-ASCII codepoint in SubStation field {!r} for line {!r} - please use unicode string instead of str".format(f, line)) elif isinstance(v, Color): if format_ == "ass": return color_to_ass_rgba(v) else: return color_to_ssa_rgb(v) else: - raise TypeError("Unexpected type when writing a SubStation field") + raise TypeError("Unexpected type when writing a SubStation field {!r} for line {!r}".format(f, line)) print("\n[V4+ Styles]" if format_ == "ass" else "\n[V4 Styles]", file=fp) print(STYLE_FORMAT_LINE[format_], file=fp) for name, sty in subs.styles.items(): - fields = [field_to_string(f, getattr(sty, f)) for f in STYLE_FIELDS[format_]] + fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]] print("Style: %s" % name, *fields, sep=",", file=fp) print("\n[Events]", file=fp) print(EVENT_FORMAT_LINE[format_], file=fp) for ev in subs.events: - fields = [field_to_string(f, getattr(ev, f)) for f in EVENT_FIELDS[format_]] + fields = [field_to_string(f, getattr(ev, f), ev) for f in EVENT_FIELDS[format_]] print(ev.type, end=": ", file=fp) print(*fields, sep=",", file=fp) diff --git a/libs/subliminal/providers/subscenter.py b/libs/subliminal/providers/subscenter.py index d9c902b7a..f9bf3c8cb 100644 --- a/libs/subliminal/providers/subscenter.py +++ b/libs/subliminal/providers/subscenter.py @@ -75,7 +75,7 @@ class SubsCenterSubtitle(Subtitle): class SubsCenterProvider(Provider): """SubsCenter Provider.""" languages = {Language.fromalpha2(l) for l in ['he']} - server_url = 'http://www.subscenter.biz/he/' + server_url = 'http://www.subscenter.org/he/' subtitle_class = SubsCenterSubtitle def __init__(self, username=None, password=None): diff --git a/libs/subliminal/subtitle.py b/libs/subliminal/subtitle.py index 726b28e37..5c2c789b2 100644 --- a/libs/subliminal/subtitle.py +++ b/libs/subliminal/subtitle.py @@ -258,4 +258,4 @@ def fix_line_ending(content): :rtype: bytes """ - return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n') + return content.replace(b'\r\n', b'\n') diff --git a/libs/subliminal_patch/http.py b/libs/subliminal_patch/http.py index a7292ff52..b4fe6ad8f 100644 --- a/libs/subliminal_patch/http.py +++ b/libs/subliminal_patch/http.py @@ -10,6 +10,8 @@ import logging import requests import xmlrpclib import dns.resolver +import ipaddress +import re from requests import exceptions from urllib3.util import connection @@ -17,7 +19,13 @@ from retry.api import retry_call from exceptions import APIThrottled from dogpile.cache.api import NO_VALUE from subliminal.cache import region -from cfscrape import CloudflareScraper +from subliminal_patch.pitcher import pitchers +from cloudscraper import CloudScraper + +try: + import brotli +except: + pass try: from urlparse import urlparse @@ -55,43 +63,111 @@ class CertifiSession(TimeoutSession): self.verify = pem_file -class CFSession(CloudflareScraper): - def __init__(self): - super(CFSession, self).__init__() +class NeedsCaptchaException(Exception): + pass + + +class CFSession(CloudScraper): + def __init__(self, *args, **kwargs): + super(CFSession, self).__init__(*args, **kwargs) self.debug = os.environ.get("CF_DEBUG", False) + def _request(self, method, url, *args, **kwargs): + ourSuper = super(CloudScraper, self) + resp = ourSuper.request(method, url, *args, **kwargs) + + if resp.headers.get('Content-Encoding') == 'br': + if self.allow_brotli and resp._content: + resp._content = brotli.decompress(resp.content) + else: + logging.warning('Brotli content detected, But option is disabled, we will not continue.') + return resp + + # Debug request + if self.debug: + self.debugRequest(resp) + + # Check if Cloudflare anti-bot is on + try: + if self.isChallengeRequest(resp): + if resp.request.method != 'GET': + # Work around if the initial request is not a GET, + # Supersede with a GET then re-request the original METHOD. + CloudScraper.request(self, 'GET', resp.url) + resp = ourSuper.request(method, url, *args, **kwargs) + else: + # Solve Challenge + resp = self.sendChallengeResponse(resp, **kwargs) + + except ValueError, e: + if e.message == "Captcha": + parsed_url = urlparse(url) + domain = parsed_url.netloc + # solve the captcha + site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1) + challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1) + challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1) + if not all([site_key, challenge_s, challenge_ray]): + raise Exception("cf: Captcha site-key not found!") + + pitcher = pitchers.get_pitcher()("cf: %s" % domain, resp.request.url, site_key, + user_agent=self.headers["User-Agent"], + cookies=self.cookies.get_dict(), + is_invisible=True) + + parsed_url = urlparse(resp.url) + logger.info("cf: %s: Solving captcha", domain) + result = pitcher.throw() + if not result: + raise Exception("cf: Couldn't solve captcha!") + + submit_url = '{}://{}/cdn-cgi/l/chk_captcha'.format(parsed_url.scheme, domain) + method = resp.request.method + + cloudflare_kwargs = { + 'allow_redirects': False, + 'headers': {'Referer': resp.url}, + 'params': OrderedDict( + [ + ('s', challenge_s), + ('g-recaptcha-response', result) + ] + ) + } + + return CloudScraper.request(self, method, submit_url, **cloudflare_kwargs) + + return resp + def request(self, method, url, *args, **kwargs): parsed_url = urlparse(url) domain = parsed_url.netloc - cache_key = "cf_data2_%s" % domain + cache_key = "cf_data3_%s" % domain if not self.cookies.get("cf_clearance", "", domain=domain): cf_data = region.get(cache_key) if cf_data is not NO_VALUE: - cf_cookies, user_agent, hdrs = cf_data + cf_cookies, hdrs = cf_data logger.debug("Trying to use old cf data for %s: %s", domain, cf_data) for cookie, value in cf_cookies.iteritems(): self.cookies.set(cookie, value, domain=domain) - self._hdrs = hdrs - self._ua = user_agent - self.headers['User-Agent'] = self._ua + self.headers = hdrs - ret = super(CFSession, self).request(method, url, *args, **kwargs) + ret = self._request(method, url, *args, **kwargs) - if self._was_cf: - self._was_cf = False - logger.debug("We've hit CF, trying to store previous data") - try: - cf_data = self.get_cf_live_tokens(domain) - except: - logger.debug("Couldn't get CF live tokens for re-use. Cookies: %r", self.cookies) - pass - else: - if cf_data != region.get(cache_key) and cf_data[0]["cf_clearance"]: + try: + cf_data = self.get_cf_live_tokens(domain) + except: + pass + else: + if cf_data and "cf_clearance" in cf_data[0] and cf_data[0]["cf_clearance"]: + if cf_data != region.get(cache_key): logger.debug("Storing cf data for %s: %s", domain, cf_data) region.set(cache_key, cf_data) + elif cf_data[0]["cf_clearance"]: + logger.debug("CF Live tokens not updated") return ret @@ -109,7 +185,7 @@ class CFSession(CloudflareScraper): ("__cfduid", self.cookies.get("__cfduid", "", domain=cookie_domain)), ("cf_clearance", self.cookies.get("cf_clearance", "", domain=cookie_domain)) ])), - self._ua, self._hdrs + self.headers ) @@ -240,42 +316,47 @@ def patch_create_connection(): global _custom_resolver, _custom_resolver_ips, dns_cache host, port = address - __custom_resolver_ips = os.environ.get("dns_resolvers", None) + try: + ipaddress.ip_address(unicode(host)) + except (ipaddress.AddressValueError, ValueError): + __custom_resolver_ips = os.environ.get("dns_resolvers", None) - # resolver ips changed in the meantime? - if __custom_resolver_ips != _custom_resolver_ips: - _custom_resolver = None - _custom_resolver_ips = __custom_resolver_ips - dns_cache = {} + # resolver ips changed in the meantime? + if __custom_resolver_ips != _custom_resolver_ips: + _custom_resolver = None + _custom_resolver_ips = __custom_resolver_ips + dns_cache = {} - custom_resolver = _custom_resolver + custom_resolver = _custom_resolver - if not custom_resolver: - if _custom_resolver_ips: - logger.debug("DNS: Trying to use custom DNS resolvers: %s", _custom_resolver_ips) - custom_resolver = dns.resolver.Resolver(configure=False) - custom_resolver.lifetime = 8.0 - try: - custom_resolver.nameservers = json.loads(_custom_resolver_ips) - except: - logger.debug("DNS: Couldn't load custom DNS resolvers: %s", _custom_resolver_ips) + if not custom_resolver: + if _custom_resolver_ips: + logger.debug("DNS: Trying to use custom DNS resolvers: %s", _custom_resolver_ips) + custom_resolver = dns.resolver.Resolver(configure=False) + custom_resolver.lifetime = os.environ.get("dns_resolvers_timeout", 8.0) + try: + custom_resolver.nameservers = json.loads(_custom_resolver_ips) + except: + logger.debug("DNS: Couldn't load custom DNS resolvers: %s", _custom_resolver_ips) + else: + _custom_resolver = custom_resolver + + if custom_resolver: + if host in dns_cache: + ip = dns_cache[host] + logger.debug("DNS: Using %s=%s from cache", host, ip) + return _orig_create_connection((ip, port), *args, **kwargs) else: - _custom_resolver = custom_resolver - - if custom_resolver: - if host in dns_cache: - ip = dns_cache[host] - logger.debug("DNS: Using %s=%s from cache", host, ip) - return _orig_create_connection((ip, port), *args, **kwargs) - else: - try: - ip = custom_resolver.query(host)[0].address - logger.debug("DNS: Resolved %s to %s using %s", host, ip, custom_resolver.nameservers) - dns_cache[host] = ip - except dns.exception.DNSException: - logger.warning("DNS: Couldn't resolve %s with DNS: %s", host, custom_resolver.nameservers) - raise + try: + ip = custom_resolver.query(host)[0].address + logger.debug("DNS: Resolved %s to %s using %s", host, ip, custom_resolver.nameservers) + dns_cache[host] = ip + return _orig_create_connection((ip, port), *args, **kwargs) + except dns.exception.DNSException: + logger.warning("DNS: Couldn't resolve %s with DNS: %s", host, custom_resolver.nameservers) + raise + logger.debug("DNS: Falling back to default DNS or IP on %s", host) return _orig_create_connection((host, port), *args, **kwargs) patch_create_connection._sz_patched = True diff --git a/libs/subliminal_patch/providers/subscene.py b/libs/subliminal_patch/providers/subscene.py index 0025470cf..2dc38b691 100644 --- a/libs/subliminal_patch/providers/subscene.py +++ b/libs/subliminal_patch/providers/subscene.py @@ -5,16 +5,11 @@ import logging import os import time import inflect -import cfscrape -from random import randint from zipfile import ZipFile - from babelfish import language_converters from guessit import guessit -from dogpile.cache.api import NO_VALUE from subliminal import Episode, ProviderError -from subliminal.cache import region from subliminal.utils import sanitize_release_group from subliminal_patch.http import RetryingCFSession from subliminal_patch.providers import Provider diff --git a/libs/subliminal_patch/providers/titlovi.py b/libs/subliminal_patch/providers/titlovi.py index b076680e9..17d87ac32 100644 --- a/libs/subliminal_patch/providers/titlovi.py +++ b/libs/subliminal_patch/providers/titlovi.py @@ -225,7 +225,8 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language - match = lang_re.search(sub.select_one('.lang').attrs['src']) + _lang = sub.select_one('.lang') + match = lang_re.search(_lang.attrs.get('src', _lang.attrs.get('cfsrc', ''))) if match: try: # decode language diff --git a/libs/subliminal_patch/subtitle.py b/libs/subliminal_patch/subtitle.py index daa922359..5ee53a46a 100644 --- a/libs/subliminal_patch/subtitle.py +++ b/libs/subliminal_patch/subtitle.py @@ -123,7 +123,8 @@ class Subtitle(Subtitle_): # http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages if self.language.alpha3 == 'zho': - encodings.extend(['cp936', 'gb2312', 'cp950', 'gb18030', 'big5', 'big5hkscs']) + encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5', + 'big5hkscs', 'utf-16']) elif self.language.alpha3 == 'jpn': encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ]) diff --git a/libs/subscene_api/subscene.py b/libs/subscene_api/subscene.py index e2b14ea26..5b53a8c95 100644 --- a/libs/subscene_api/subscene.py +++ b/libs/subscene_api/subscene.py @@ -244,7 +244,7 @@ def get_first_film(soup, section, year=None, session=None): def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact): - soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "title", term), session=session) + soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "search", term), session=session) if "Subtitle search by" in str(soup): rows = soup.find("table").tbody.find_all("tr") diff --git a/libs/subzero/constants.py b/libs/subzero/constants.py index b89da8199..cdfa242a6 100644 --- a/libs/subzero/constants.py +++ b/libs/subzero/constants.py @@ -2,7 +2,8 @@ OS_PLEX_USERAGENT = 'plexapp.com v9.0' -DEPENDENCY_MODULE_NAMES = ['subliminal', 'subliminal_patch', 'enzyme', 'guessit', 'subzero', 'libfilebot', 'cfscrape'] +DEPENDENCY_MODULE_NAMES = ['subliminal', 'subliminal_patch', 'enzyme', 'guessit', 'subzero', 'libfilebot', + 'cloudscraper'] PERSONAL_MEDIA_IDENTIFIER = "com.plexapp.agents.none" PLUGIN_IDENTIFIER_SHORT = "subzero" PLUGIN_IDENTIFIER = "com.plexapp.agents.%s" % PLUGIN_IDENTIFIER_SHORT