Initial commit.

2024-09-20 07:25:58 +08:00 · 2018-10-20 22:53:12 -04:00 · 2018-10-20 22:53:12 -04:00 · d61bdfcd4f
parent b9a987b57d
commit d61bdfcd4f
19 changed files with 130 additions and 427 deletions
--- a/bazarr/get_providers.py
+++ b/bazarr/get_providers.py
@ -3,6 +3,7 @@ from get_argv import config_dir
 import sqlite3
 import os
 from subliminal import provider_manager
+from subliminal_patch import provider_manager
 import collections

 def load_providers():
--- a/bazarr/get_subtitle.py
+++ b/bazarr/get_subtitle.py
@ -10,7 +10,9 @@ import time
 from datetime import datetime, timedelta
 from babelfish import Language
 from subliminal import region, scan_video, Video, download_best_subtitles, compute_score, save_subtitles, AsyncProviderPool, score, list_subtitles, download_subtitles
+from subliminal_patch import region, scan_video, Video, download_best_subtitles, compute_score, save_subtitles, AsyncProviderPool, score, list_subtitles, download_subtitles
 from subliminal.subtitle import get_subtitle_path
+from subliminal_patch.subtitle import get_subtitle_path
 from get_languages import language_from_alpha3, alpha2_from_alpha3, alpha3_from_alpha2
 from bs4 import UnicodeDammit
 from get_settings import get_general_settings, pp_replace, path_replace, path_replace_movie, path_replace_reverse, path_replace_reverse_movie
--- a/bazarr/list_subtitles.py
+++ b/bazarr/list_subtitles.py
@ -6,6 +6,7 @@ import enzyme
 import babelfish
 import logging
 from subliminal import core
+from subliminal_patch import core
 import sqlite3
 import ast
 import langdetect
--- a/libs/bs4/init.py
+++ b/libs/bs4/init.py
@ -21,15 +21,14 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 # found in the LICENSE file.

 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.6.3"
-__copyright__ = "Copyright (c) 2004-2018 Leonard Richardson"
+__version__ = "4.6.0"
+__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
 __license__ = "MIT"

 __all__ = ['BeautifulSoup']

 import os
 import re
-import sys
 import traceback
 import warnings

@ -83,46 +82,14 @@ class BeautifulSoup(Tag):

    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'

-    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"

    def __init__(self, markup="", features=None, builder=None,
                 parse_only=None, from_encoding=None, exclude_encodings=None,
                 **kwargs):
-        """Constructor.
-
-        :param markup: A string or a file-like object representing
-        markup to be parsed.
-
-        :param features: Desirable features of the parser to be used. This
-        may be the name of a specific parser ("lxml", "lxml-xml",
-        "html.parser", or "html5lib") or it may be the type of markup
-        to be used ("html", "html5", "xml"). It's recommended that you
-        name a specific parser, so that Beautiful Soup gives you the
-        same results across platforms and virtual environments.
-
-        :param builder: A specific TreeBuilder to use instead of looking one
-        up based on `features`. You shouldn't need to use this.
-
-        :param parse_only: A SoupStrainer. Only parts of the document
-        matching the SoupStrainer will be considered. This is useful
-        when parsing part of a document that would otherwise be too
-        large to fit into memory.
-
-        :param from_encoding: A string indicating the encoding of the
-        document to be parsed. Pass this in if Beautiful Soup is
-        guessing wrongly about the document's encoding.
-
-        :param exclude_encodings: A list of strings indicating
-        encodings known to be wrong. Pass this in if you don't know
-        the document's encoding but you know Beautiful Soup's guess is
-        wrong.
-
-        :param kwargs: For backwards compatibility purposes, the
-        constructor accepts certain keyword arguments used in
-        Beautiful Soup 3. None of these arguments do anything in
-        Beautiful Soup 4 and there's no need to actually pass keyword
-        arguments into the constructor.
-        """
+        """The Soup object is initialized as the 'root tag', and the
+        provided markup (which can be a string or a file-like object)
+        is fed into the underlying parser."""

        if 'convertEntities' in kwargs:
            warnings.warn(
@ -204,35 +171,14 @@ class BeautifulSoup(Tag):
                else:
                    markup_type = "HTML"

-                # This code adapted from warnings.py so that we get the same line
-                # of code as our warnings.warn() call gets, even if the answer is wrong
-                # (as it may be in a multithreading situation).
-                caller = None
-                try:
-                    caller = sys._getframe(1)
-                except ValueError:
-                    pass
-                if caller:
-                    globals = caller.f_globals
-                    line_number = caller.f_lineno
-                else:
-                    globals = sys.__dict__
-                    line_number= 1                    
-                filename = globals.get('__file__')
-                if filename:
-                    fnl = filename.lower()
-                    if fnl.endswith((".pyc", ".pyo")):
-                        filename = filename[:-1]
-                if filename:
-                    # If there is no filename at all, the user is most likely in a REPL,
-                    # and the warning is not necessary.
-                    values = dict(
-                        filename=filename,
-                        line_number=line_number,
-                        parser=builder.NAME,
-                        markup_type=markup_type
-                    )
-                    warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
+                caller = traceback.extract_stack()[0]
+                filename = caller[0]
+                line_number = caller[1]
+                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+                    filename=filename,
+                    line_number=line_number,
+                    parser=builder.NAME,
+                    markup_type=markup_type))

        self.builder = builder
        self.is_xml = builder.is_xml
@ -356,10 +302,9 @@ class BeautifulSoup(Tag):
        self.preserve_whitespace_tag_stack = []
        self.pushTag(self)

-    def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, **kwattrs):
+    def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
        """Create a new tag associated with this soup."""
-        kwattrs.update(attrs)
-        return Tag(None, self.builder, name, namespace, nsprefix, kwattrs)
+        return Tag(None, self.builder, name, namespace, nsprefix, attrs)

    def new_string(self, s, subclass=NavigableString):
        """Create a new NavigableString associated with this soup."""
--- a/libs/bs4/builder/init.py
+++ b/libs/bs4/builder/init.py
@ -93,7 +93,7 @@ class TreeBuilder(object):
    preserve_whitespace_tags = set()
    empty_element_tags = None # A tag will be considered an empty-element
                              # tag when and only when it has no contents.
-    
+
    # A value for these tag/attribute combinations is a space- or
    # comma-separated list of CDATA, rather than a single CDATA.
    cdata_list_attributes = {}
@ -125,7 +125,7 @@ class TreeBuilder(object):
        if self.empty_element_tags is None:
            return True
        return tag_name in self.empty_element_tags
-        
+
    def feed(self, markup):
        raise NotImplementedError()

@ -235,17 +235,11 @@ class HTMLTreeBuilder(TreeBuilder):
    empty_element_tags = set([
        # These are from HTML5.
        'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
-        
-        # These are from earlier versions of HTML and are removed in HTML5.
-        'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer'
+
+        # These are from HTML4, removed in HTML5.
+        'spacer', 'frame'
    ])

-    # The HTML standard defines these as block-level elements. Beautiful
-    # Soup does not treat these elements differently from other elements,
-    # but it may do so eventually, and this information is available if
-    # you need to use it.
-    block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"])
-    
    # The HTML standard defines these attributes as containing a
    # space-separated list of values, not a single value. That is,
    # class="foo bar" means that the 'class' attribute has two values,
--- a/libs/bs4/builder/_htmlparser.py
+++ b/libs/bs4/builder/_htmlparser.py
@ -1,4 +1,3 @@
-# encoding: utf-8
 """Use the HTMLParser library to parse HTML files that aren't too bad."""

 # Use of this source code is governed by a BSD-style license that can be
@ -65,18 +64,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
        # order. It's a list of closing tags we've already handled and
        # will ignore, assuming they ever show up.
        self.already_closed_empty_element = []
-
-    def error(self, msg):
-        """In Python 3, HTMLParser subclasses must implement error(), although this
-        requirement doesn't appear to be documented.
-
-        In Python 2, HTMLParser implements error() as raising an exception.
-
-        In any event, this method is called only on very strange markup and our best strategy
-        is to pretend it didn't happen and keep going.
-        """
-        warnings.warn(msg)
-        
+    
    def handle_startendtag(self, name, attrs):
        # This is only called when the markup looks like
        # <tag/>.
@ -141,26 +129,11 @@ class BeautifulSoupHTMLParser(HTMLParser):
        else:
            real_name = int(name)

-        data = None
-        if real_name < 256:
-            # HTML numeric entities are supposed to reference Unicode
-            # code points, but sometimes they reference code points in
-            # some other encoding (ahem, Windows-1252). E.g. &#147;
-            # instead of &#201; for LEFT DOUBLE QUOTATION MARK. This
-            # code tries to detect this situation and compensate.
-            for encoding in (self.soup.original_encoding, 'windows-1252'):
-                if not encoding:
-                    continue
-                try:
-                    data = bytearray([real_name]).decode(encoding)
-                except UnicodeDecodeError, e:
-                    pass
-        if not data:
-            try:
-                data = unichr(real_name)
-            except (ValueError, OverflowError), e:
-                pass
-        data = data or u"\N{REPLACEMENT CHARACTER}"
+        try:
+            data = unichr(real_name)
+        except (ValueError, OverflowError), e:
+            data = u"\N{REPLACEMENT CHARACTER}"
+
        self.handle_data(data)

    def handle_entityref(self, name):
@ -168,12 +141,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
        if character is not None:
            data = character
        else:
-            # If this were XML, it would be ambiguous whether "&foo"
-            # was an character entity reference with a missing
-            # semicolon or the literal string "&foo". Since this is
-            # HTML, we have a complete list of all character entity references,
-            # and this one wasn't found, so assume it's the literal string "&foo".
-            data = "&%s" % name
+            data = "&%s;" % name
        self.handle_data(data)

    def handle_comment(self, data):
@ -245,7 +213,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
        parser.soup = self.soup
        try:
            parser.feed(markup)
-            parser.close()
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
--- a/libs/bs4/builder/_lxml.py
+++ b/libs/bs4/builder/_lxml.py
@ -5,13 +5,9 @@ __all__ = [
    'LXMLTreeBuilder',
    ]

-try:
-    from collections.abc import Callable # Python 3.6
-except ImportError , e:
-    from collections import Callable
-
 from io import BytesIO
 from StringIO import StringIO
+import collections
 from lxml import etree
 from bs4.element import (
    Comment,
@ -62,7 +58,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        # Use the default parser.
        parser = self.default_parser(encoding)

-        if isinstance(parser, Callable):
+        if isinstance(parser, collections.Callable):
            # Instantiate the parser with default arguments
            parser = parser(target=self, strip_cdata=False, encoding=encoding)
        return parser
@ -151,11 +147,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        attrs = dict(attrs)
        nsprefix = None
        # Invert each namespace map as it comes in.
-        if len(nsmap) == 0 and len(self.nsmaps) > 1:
-                # There are no new namespaces for this tag, but
-                # non-default namespaces are in play, so we need a
-                # separate tag stack to know when they end.
-                self.nsmaps.append(None)
+        if len(self.nsmaps) > 1:
+            # There are no new namespaces for this tag, but
+            # non-default namespaces are in play, so we need a
+            # separate tag stack to know when they end.
+            self.nsmaps.append(None)
        elif len(nsmap) > 0:
            # A new namespace mapping has come into play.
            inverted_nsmap = dict((value, key) for key, value in nsmap.items())
--- a/libs/bs4/dammit.py
+++ b/libs/bs4/dammit.py
@ -46,9 +46,9 @@ except ImportError:
    pass

 xml_encoding_re = re.compile(
-    '^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I)
+    '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
 html_meta_re = re.compile(
-    '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+    '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)

 class EntitySubstitution(object):

@ -82,7 +82,7 @@ class EntitySubstitution(object):
        }

    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
-                                           "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)"
+                                           "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
                                           ")")

    AMPERSAND_OR_BRACKET = re.compile("([<>&])")
--- a/libs/bs4/diagnose.py
+++ b/libs/bs4/diagnose.py
@ -37,7 +37,7 @@ def diagnose(data):
                name)

    if 'lxml' in basic_parsers:
-        basic_parsers.append("lxml-xml")
+        basic_parsers.append(["lxml", "xml"])
        try:
            from lxml import etree
            print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
@ -56,27 +56,21 @@ def diagnose(data):

    if hasattr(data, 'read'):
        data = data.read()
+    elif os.path.exists(data):
+        print '"%s" looks like a filename. Reading data from the file.' % data
+        with open(data) as fp:
+            data = fp.read()
    elif data.startswith("http:") or data.startswith("https:"):
        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
        return
-    else:
-        try:
-            if os.path.exists(data):
-                print '"%s" looks like a filename. Reading data from the file.' % data
-                with open(data) as fp:
-                    data = fp.read()
-        except ValueError:
-            # This can happen on some platforms when the 'filename' is
-            # too long. Assume it's data and not a filename.
-            pass
-        print
+    print

    for parser in basic_parsers:
        print "Trying to parse your markup with %s" % parser
        success = False
        try:
-            soup = BeautifulSoup(data, features=parser)
+            soup = BeautifulSoup(data, parser)
            success = True
        except Exception, e:
            print "%s could not parse the markup." % parser
--- a/libs/bs4/element.py
+++ b/libs/bs4/element.py
@ -2,10 +2,7 @@
 # found in the LICENSE file.
 __license__ = "MIT"

-try:
-    from collections.abc import Callable # Python 3.6
-except ImportError , e:
-    from collections import Callable
+import collections
 import re
 import shlex
 import sys
@ -15,7 +12,7 @@ from bs4.dammit import EntitySubstitution
 DEFAULT_OUTPUT_ENCODING = "utf-8"
 PY3K = (sys.version_info[0] > 2)

-whitespace_re = re.compile(r"\s+")
+whitespace_re = re.compile("\s+")

 def _alias(attr):
    """Alias one attribute name to another for backward compatibility"""
@ -72,7 +69,7 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
    The value of the 'content' attribute will be one of these objects.
    """

-    CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M)
+    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)

    def __new__(cls, original_value):
        match = cls.CHARSET_RE.search(original_value)
@ -126,41 +123,6 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
        return cls._substitute_if_appropriate(
            ns, EntitySubstitution.substitute_xml)

-class Formatter(object):
-    """Contains information about how to format a parse tree."""
-    
-    # By default, represent void elements as <tag/> rather than <tag>
-    void_element_close_prefix = '/'
-
-    def substitute_entities(self, *args, **kwargs):
-        """Transform certain characters into named entities."""
-        raise NotImplementedError()
-
-class HTMLFormatter(Formatter):
-    """The default HTML formatter."""
-    def substitute(self, *args, **kwargs):
-        return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)
-
-class MinimalHTMLFormatter(Formatter):
-    """A minimal HTML formatter."""
-    def substitute(self, *args, **kwargs):
-        return HTMLAwareEntitySubstitution.substitute_xml(*args, **kwargs)
-    
-class HTML5Formatter(HTMLFormatter):
-    """An HTML formatter that omits the slash in a void tag."""
-    void_element_close_prefix = None
-
-class XMLFormatter(Formatter):
-    """Substitute only the essential XML entities."""
-    def substitute(self, *args, **kwargs):
-        return EntitySubstitution.substitute_xml(*args, **kwargs)
-
-class HTMLXMLFormatter(Formatter):
-    """Format XML using HTML rules."""
-    def substitute(self, *args, **kwargs):
-        return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)
-
-    
 class PageElement(object):
    """Contains the navigational information for some part of the page
    (either a tag or a piece of text)"""
@ -169,49 +131,40 @@ class PageElement(object):
    # to methods like encode() and prettify():
    #
    # "html" - All Unicode characters with corresponding HTML entities
-    #   are converted to those entities on output.
-    # "html5" - The same as "html", but empty void tags are represented as
-    #   <tag> rather than <tag/>
-    # "minimal" - Bare ampersands and angle brackets are converted to
+    #   are converted to those entities on output. 
+   # "minimal" - Bare ampersands and angle brackets are converted to
    #   XML entities: &amp; &lt; &gt;
    # None - The null formatter. Unicode characters are never
    #   converted to entities.  This is not recommended, but it's
    #   faster than "minimal".
-    # A callable function - it will be called on every string that needs to undergo entity substitution.
-    # A Formatter instance - Formatter.substitute(string) will be called on every string that
+    # A function - This function will be called on every string that
    #  needs to undergo entity substitution.
    #

-    # In an HTML document, the default "html", "html5", and "minimal"
-    # functions will leave the contents of <script> and <style> tags
-    # alone. For an XML document, all tags will be given the same
-    # treatment.
+    # In an HTML document, the default "html" and "minimal" functions
+    # will leave the contents of <script> and <style> tags alone. For
+    # an XML document, all tags will be given the same treatment.

    HTML_FORMATTERS = {
-        "html" : HTMLFormatter(),
-        "html5" : HTML5Formatter(),
-        "minimal" : MinimalHTMLFormatter(),
+        "html" : HTMLAwareEntitySubstitution.substitute_html,
+        "minimal" : HTMLAwareEntitySubstitution.substitute_xml,
        None : None
        }

    XML_FORMATTERS = {
-        "html" : HTMLXMLFormatter(),
-        "minimal" : XMLFormatter(),
+        "html" : EntitySubstitution.substitute_html,
+        "minimal" : EntitySubstitution.substitute_xml,
        None : None
        }

    def format_string(self, s, formatter='minimal'):
        """Format the given string using the given formatter."""
-        if isinstance(formatter, basestring):
+        if not callable(formatter):
            formatter = self._formatter_for_name(formatter)
        if formatter is None:
            output = s
        else:
-            if callable(formatter):
-                # Backwards compatibility -- you used to pass in a formatting method.
-                output = formatter(s)
-            else:
-                output = formatter.substitute(s)
+            output = formatter(s)
        return output

    @property
@ -241,9 +194,11 @@ class PageElement(object):
    def _formatter_for_name(self, name):
        "Look up a formatter function based on its name and the tree."
        if self._is_xml:
-            return self.XML_FORMATTERS.get(name, XMLFormatter())
+            return self.XML_FORMATTERS.get(
+                name, EntitySubstitution.substitute_xml)
        else:
-            return self.HTML_FORMATTERS.get(name, HTMLFormatter())
+            return self.HTML_FORMATTERS.get(
+                name, HTMLAwareEntitySubstitution.substitute_xml)

    def setup(self, parent=None, previous_element=None, next_element=None,
              previous_sibling=None, next_sibling=None):
@ -361,14 +316,6 @@ class PageElement(object):
            and not isinstance(new_child, NavigableString)):
            new_child = NavigableString(new_child)

-        from bs4 import BeautifulSoup
-        if isinstance(new_child, BeautifulSoup):
-            # We don't want to end up with a situation where one BeautifulSoup
-            # object contains another. Insert the children one at a time.
-            for subchild in list(new_child.contents):
-                self.insert(position, subchild)
-                position += 1
-            return
        position = min(position, len(self.contents))
        if hasattr(new_child, 'parent') and new_child.parent is not None:
            # We're 'inserting' an element that's already one
@ -589,21 +536,14 @@ class PageElement(object):
            elif isinstance(name, basestring):
                # Optimization to find all tags with a given name.
                if name.count(':') == 1:
-                    # This is a name with a prefix. If this is a namespace-aware document,
-                    # we need to match the local name against tag.name. If not,
-                    # we need to match the fully-qualified name against tag.name.
-                    prefix, local_name = name.split(':', 1)
+                    # This is a name with a prefix.
+                    prefix, name = name.split(':', 1)
                else:
                    prefix = None
-                    local_name = name
                result = (element for element in generator
                          if isinstance(element, Tag)
-                          and (
-                              element.name == name
-                          ) or (
-                              element.name == local_name
-                              and (prefix is None or element.prefix == prefix)
-                          )
+                            and element.name == name
+                          and (prefix is None or element.prefix == prefix)
                )
                return ResultSet(strainer, result)
        results = ResultSet(strainer)
@ -922,7 +862,7 @@ class Tag(PageElement):
            self.can_be_empty_element = builder.can_be_empty_element(name)
        else:
            self.can_be_empty_element = False
-            
+
    parserClass = _alias("parser_class")  # BS3

    def __copy__(self):
@ -1106,10 +1046,8 @@ class Tag(PageElement):
            # BS3: soup.aTag -> "soup.find("a")
            tag_name = tag[:-3]
            warnings.warn(
-                '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
-                    name=tag_name
-                )
-            )
+                '.%sTag is deprecated, use .find("%s") instead.' % (
+                    tag_name, tag_name))
            return self.find(tag_name)
        # We special case contents to avoid recursion.
        elif not tag.startswith("__") and not tag == "contents":
@ -1191,10 +1129,11 @@ class Tag(PageElement):
           encoding.
        """

-        # First off, turn a string formatter into a Formatter object. This
+        # First off, turn a string formatter into a function. This
        # will stop the lookup from happening over and over again.
-        if not isinstance(formatter, Formatter) and not callable(formatter):
+        if not callable(formatter):
            formatter = self._formatter_for_name(formatter)
+
        attrs = []
        if self.attrs:
            for key, val in sorted(self.attrs.items()):
@ -1223,9 +1162,7 @@ class Tag(PageElement):
            prefix = self.prefix + ":"

        if self.is_empty_element:
-            close = ''
-            if isinstance(formatter, Formatter):
-                close = formatter.void_element_close_prefix or close
+            close = '/'
        else:
            closeTag = '</%s%s>' % (prefix, self.name)

@ -1296,9 +1233,9 @@ class Tag(PageElement):
        :param formatter: The output formatter responsible for converting
           entities to Unicode characters.
        """
-        # First off, turn a string formatter into a Formatter object. This
+        # First off, turn a string formatter into a function. This
        # will stop the lookup from happening over and over again.
-        if not isinstance(formatter, Formatter) and not callable(formatter):
+        if not callable(formatter):
            formatter = self._formatter_for_name(formatter)

        pretty_print = (indent_level is not None)
@ -1411,29 +1348,15 @@ class Tag(PageElement):
        # Handle grouping selectors if ',' exists, ie: p,a
        if ',' in selector:
            context = []
-            selectors = [x.strip() for x in selector.split(",")]
-
-            # If a selector is mentioned multiple times we don't want
-            # to use it more than once.
-            used_selectors = set()
-
-            # We also don't want to select the same element more than once,
-            # if it's matched by multiple selectors.
-            selected_object_ids = set()
-            for partial_selector in selectors:
+            for partial_selector in selector.split(','):
+                partial_selector = partial_selector.strip()
                if partial_selector == '':
                    raise ValueError('Invalid group selection syntax: %s' % selector)
-                if partial_selector in used_selectors:
-                    continue
-                used_selectors.add(partial_selector)
                candidates = self.select(partial_selector, limit=limit)
                for candidate in candidates:
-                    # This lets us distinguish between distinct tags that
-                    # represent the same markup.
-                    object_id = id(candidate)
-                    if object_id not in selected_object_ids:
+                    if candidate not in context:
                        context.append(candidate)
-                        selected_object_ids.add(object_id)
+
                if limit and len(context) >= limit:
                    break
            return context
@ -1495,7 +1418,7 @@ class Tag(PageElement):
                if tag_name == '':
                    raise ValueError(
                        "A pseudo-class must be prefixed with a tag name.")
-                pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
+                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
                found = []
                if pseudo_attributes is None:
                    pseudo_type = pseudo
@ -1729,7 +1652,7 @@ class SoupStrainer(object):
            markup = markup_name
            markup_attrs = markup
        call_function_with_tag_data = (
-            isinstance(self.name, Callable)
+            isinstance(self.name, collections.Callable)
            and not isinstance(markup_name, Tag))

        if ((not self.name)
@ -1809,7 +1732,7 @@ class SoupStrainer(object):
            # True matches any non-None value.
            return markup is not None

-        if isinstance(match_against, Callable):
+        if isinstance(match_against, collections.Callable):
            return match_against(markup)

        # Custom callables take the tag as an argument, but all
--- a/libs/bs4/testing.py
+++ b/libs/bs4/testing.py
@ -1,4 +1,3 @@
-# encoding: utf-8
 """Helper classes for tests."""

 # Use of this source code is governed by a BSD-style license that can be
@ -151,14 +150,6 @@ class HTMLTreeBuilderSmokeTest(object):
            soup.encode("utf-8").replace(b"\n", b""),
            markup.replace(b"\n", b""))

-    def test_namespaced_html(self):
-        """When a namespaced XML document is parsed as HTML it should
-        be treated as HTML with weird tag names.
-        """
-        markup = b"""<ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>"""
-        soup = self.soup(markup)
-        self.assertEqual(2, len(soup.find_all("ns1:foo")))
-        
    def test_processing_instruction(self):
        # We test both Unicode and bytestring to verify that
        # process_markup correctly sets processing_instruction_class
@ -320,26 +311,6 @@ Hello, world!
    def test_angle_brackets_in_attribute_values_are_escaped(self):
        self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')

-    def test_strings_resembling_character_entity_references(self):
-        # "&T" and "&p" look like incomplete character entities, but they are
-        # not.
-        self.assertSoupEquals(
-            u"<p>&bull; AT&T is in the s&p 500</p>",
-            u"<p>\u2022 AT&amp;T is in the s&amp;p 500</p>"
-        )
-
-    def test_entities_in_foreign_document_encoding(self):
-        # &#147; and &#148; are invalid numeric entities referencing
-        # Windows-1252 characters. &#45; references a character common
-        # to Windows-1252 and Unicode, and &#9731; references a
-        # character only found in Unicode.
-        #
-        # All of these entities should be converted to Unicode
-        # characters.
-        markup = "<p>&#147;Hello&#148; &#45;&#9731;</p>"
-        soup = self.soup(markup)
-        self.assertEquals(u"“Hello” -☃", soup.p.string)
-        
    def test_entities_in_attributes_converted_to_unicode(self):
        expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
        self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
@ -363,7 +334,7 @@ Hello, world!
        self.assertSoupEquals("&#10000000000000;", expect)
        self.assertSoupEquals("&#x10000000000000;", expect)
        self.assertSoupEquals("&#1000000000;", expect)
-        
+
    def test_multipart_strings(self):
        "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
        soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
@ -653,17 +624,6 @@ class XMLTreeBuilderSmokeTest(object):
        self.assertEqual(
            soup.encode("utf-8"), markup)

-    def test_nested_namespaces(self):
-        doc = b"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<parent xmlns="http://ns1/">
-<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
-<grandchild ns3:attr="value" xmlns="http://ns4/"/>
-</child>
-</parent>"""
-        soup = self.soup(doc)
-        self.assertEqual(doc, soup.encode())
-        
    def test_formatter_processes_script_tag_for_xml_documents(self):
        doc = """
  <script type="text/javascript">
--- a/libs/bs4/tests/test_htmlparser.py
+++ b/libs/bs4/tests/test_htmlparser.py
@ -5,7 +5,6 @@ from pdb import set_trace
 import pickle
 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
 from bs4.builder import HTMLParserTreeBuilder
-from bs4.builder._htmlparser import BeautifulSoupHTMLParser

 class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):

@ -33,17 +32,3 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
    def test_redundant_empty_element_closing_tags(self):
        self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
        self.assertSoupEquals('</br></br></br>', "")
-
-    def test_empty_element(self):
-        # This verifies that any buffered data present when the parser
-        # finishes working is handled.
-        self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
-
-
-class TestHTMLParserSubclass(SoupTest):
-    def test_error(self):
-        """Verify that our HTMLParser subclass implements error() in a way
-        that doesn't cause a crash.
-        """
-        parser = BeautifulSoupHTMLParser()
-        parser.error("don't crash")
--- a/libs/bs4/tests/test_lxml.py
+++ b/libs/bs4/tests/test_lxml.py
@ -46,12 +46,6 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
        self.assertSoupEquals(
            "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")

-    def test_entities_in_foreign_document_encoding(self):
-        # We can't implement this case correctly because by the time we
-        # hear about markup like "&#147;", it's been (incorrectly) converted into
-        # a string like u'\x93'
-        pass
-        
    # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
    # test if an old version of lxml is installed.

--- a/libs/bs4/tests/test_tree.py
+++ b/libs/bs4/tests/test_tree.py
@ -605,7 +605,7 @@ class SiblingTest(TreeTest):
                    </html>'''
        # All that whitespace looks good but makes the tests more
        # difficult. Get rid of it.
-        markup = re.compile(r"\n\s*").sub("", markup)
+        markup = re.compile("\n\s*").sub("", markup)
        self.tree = self.soup(markup)


@ -703,12 +703,12 @@ class TestTagCreation(SoupTest):
    """Test the ability to create new tags."""
    def test_new_tag(self):
        soup = self.soup("")
-        new_tag = soup.new_tag("foo", bar="baz", attrs={"name": "a name"})
+        new_tag = soup.new_tag("foo", bar="baz")
        self.assertTrue(isinstance(new_tag, Tag))
        self.assertEqual("foo", new_tag.name)
-        self.assertEqual(dict(bar="baz", name="a name"), new_tag.attrs)
+        self.assertEqual(dict(bar="baz"), new_tag.attrs)
        self.assertEqual(None, new_tag.parent)
-        
+
    def test_tag_inherits_self_closing_rules_from_builder(self):
        if XML_BUILDER_PRESENT:
            xml_soup = BeautifulSoup("", "lxml-xml")
@ -821,26 +821,6 @@ class TestTreeModification(SoupTest):
        soup = self.soup(text)
        self.assertRaises(ValueError, soup.a.insert, 0, soup.a)

-    def test_insert_beautifulsoup_object_inserts_children(self):
-        """Inserting one BeautifulSoup object into another actually inserts all
-        of its children -- you'll never combine BeautifulSoup objects.
-        """
-        soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>")
-        
-        text = "<p>p2</p><p>p3</p>"
-        to_insert = self.soup(text)
-        soup.insert(1, to_insert)
-
-        for i in soup.descendants:
-            assert not isinstance(i, BeautifulSoup)
-        
-        p1, p2, p3, p4 = list(soup.children)
-        self.assertEquals("And now, a word:", p1.string)
-        self.assertEquals("p2", p2.string)
-        self.assertEquals("p3", p3.string)
-        self.assertEquals("And we're back.", p4.string)
-        
-        
    def test_replace_with_maintains_next_element_throughout(self):
        soup = self.soup('<p><a>one</a><b>three</b></p>')
        a = soup.a
@ -1206,7 +1186,7 @@ class TestElementObjects(SoupTest):
            tag = soup.bTag
        self.assertEqual(soup.b, tag)
        self.assertEqual(
-            '.bTag is deprecated, use .find("b") instead. If you really were looking for a tag called bTag, use .find("bTag")',
+            '.bTag is deprecated, use .find("b") instead.',
            str(w[0].message))

    def test_has_attr(self):
@ -1439,21 +1419,13 @@ class TestSubstitutions(SoupTest):
                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))

    def test_formatter_html(self):
-        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
        soup = self.soup(markup)
        decoded = soup.decode(formatter="html")
        self.assertEqual(
            decoded,
-            self.document_for("<br/><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+            self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))

-    def test_formatter_html5(self):
-        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
-        soup = self.soup(markup)
-        decoded = soup.decode(formatter="html5")
-        self.assertEqual(
-            decoded,
-            self.document_for("<br><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
-        
    def test_formatter_minimal(self):
        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
        soup = self.soup(markup)
@ -1474,14 +1446,14 @@ class TestSubstitutions(SoupTest):
                          self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))

    def test_formatter_custom(self):
-        markup = u"<b>&lt;foo&gt;</b><b>bar</b><br/>"
+        markup = u"<b>&lt;foo&gt;</b><b>bar</b>"
        soup = self.soup(markup)
        decoded = soup.decode(formatter = lambda x: x.upper())
        # Instead of normal entity conversion code, the custom
        # callable is called on every string.
        self.assertEqual(
            decoded,
-            self.document_for(u"<b><FOO></b><b>BAR</b><br>"))
+            self.document_for(u"<b><FOO></b><b>BAR</b>"))

    def test_formatter_is_run_on_attribute_values(self):
        markup = u'<a href="http://a.com?a=b&c=é">e</a>'
@ -1526,7 +1498,7 @@ class TestSubstitutions(SoupTest):
            u'<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n</div>',
            soup.div.prettify())

-    def test_prettify_accepts_formatter_function(self):
+    def test_prettify_accepts_formatter(self):
        soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
        pretty = soup.prettify(formatter = lambda x: x.upper())
        self.assertTrue("FOO" in pretty)
@ -2074,17 +2046,5 @@ class TestSoupSelector(TreeTest):
    def test_multiple_select_nested(self):
        self.assertSelects('body > div > x, y > z', ['xid', 'zidb'])

-    def test_select_duplicate_elements(self):
-        # When markup contains duplicate elements, a multiple select
-        # will find all of them.
-        markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
-        soup = BeautifulSoup(markup, 'html.parser')
-        selected = soup.select(".c1, .c2")
-        self.assertEquals(3, len(selected))

-        # Verify that find_all finds the same elements, though because
-        # of an implementation detail it finds them in a different
-        # order.
-        for element in soup.find_all(class_=['c1', 'c2']):
-            assert element in selected

--- a/libs/subliminal/core.py
+++ b/libs/subliminal/core.py
@ -1,6 +1,21 @@
 # -*- coding: utf-8 -*-
 from collections import defaultdict
-from concurrent.futures import ThreadPoolExecutor
+
+import platform
+is_windows_special_path = False
+
+if platform.system() == "Windows":
+    try:
+        __file__.decode("ascii")
+    except UnicodeDecodeError:
+        is_windows_special_path = True
+
+if not is_windows_special_path:
+    from concurrent.futures import ThreadPoolExecutor
+else:
+    ThreadPoolExecutor = object
+
+
 from datetime import datetime
 import io
 import itertools
@ -388,7 +403,7 @@ def search_external_subtitles(path, directory=None):
    subtitles = {}
    for p in os.listdir(directory or dirpath):
        # keep only valid subtitle filenames
-        if not p.startswith(fileroot) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
+        if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
            continue

        # extract the potential language code
@ -420,7 +435,7 @@ def scan_video(path):
        raise ValueError('Path does not exist')

    # check video extension
-    if not path.lower().endswith(VIDEO_EXTENSIONS):
+    if not path.endswith(VIDEO_EXTENSIONS):
        raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])

    dirpath, filename = os.path.split(path)
@ -468,7 +483,7 @@ def scan_archive(path):
        rar = RarFile(path)

        # filter on video extensions
-        rar_filenames = [f for f in rar.namelist() if f.lower().endswith(VIDEO_EXTENSIONS)]
+        rar_filenames = [f for f in rar.namelist() if f.endswith(VIDEO_EXTENSIONS)]

        # no video found
        if not rar_filenames:
@ -521,26 +536,17 @@ def scan_videos(path, age=None, archives=True):
            if dirname.startswith('.'):
                logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
                dirnames.remove(dirname)
-            # Skip Sample folder
-            if dirname.lower() == 'sample':
-                logger.debug('Skipping sample dirname %r in %r', dirname, dirpath)
-                dirnames.remove(dirname)

        # scan for videos
        for filename in filenames:
            # filter on videos and archives
-            if not (filename.lower().endswith(VIDEO_EXTENSIONS) or
-                    archives and filename.lower().endswith(ARCHIVE_EXTENSIONS)):
+            if not (filename.endswith(VIDEO_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
                continue

            # skip hidden files
            if filename.startswith('.'):
                logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
                continue
-            # skip 'sample' media files
-            if os.path.splitext(filename)[0].lower() == 'sample':
-                logger.debug('Skipping sample filename %r in %r', filename, dirpath)
-                continue

            # reconstruct the file path
            filepath = os.path.join(dirpath, filename)
@ -562,13 +568,13 @@ def scan_videos(path, age=None, archives=True):
                    continue

            # scan
-            if filename.lower().endswith(VIDEO_EXTENSIONS):  # video
+            if filename.endswith(VIDEO_EXTENSIONS):  # video
                try:
                    video = scan_video(filepath)
                except ValueError:  # pragma: no cover
                    logger.exception('Error scanning video')
                    continue
-            elif archives and filename.lower().endswith(ARCHIVE_EXTENSIONS):  # archive
+            elif archives and filename.endswith(ARCHIVE_EXTENSIONS):  # archive
                try:
                    video = scan_archive(filepath)
                except (NotRarFile, RarCannotExec, ValueError):  # pragma: no cover
--- a/libs/subliminal/providers/addic7ed.py
+++ b/libs/subliminal/providers/addic7ed.py
@ -179,7 +179,7 @@ class Addic7edProvider(Provider):

        # make the search
        logger.info('Searching show ids with %r', params)
-        r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
+        r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

--- a/libs/subliminal/providers/legendastv.py
+++ b/libs/subliminal/providers/legendastv.py
@ -170,7 +170,7 @@ class LegendasTVProvider(Provider):

        # Provider needs UNRAR installed. If not available raise ConfigurationError
        try:
-            rarfile.custom_check([rarfile.UNRAR_TOOL], True)
+            rarfile.custom_check(rarfile.UNRAR_TOOL)
        except rarfile.RarExecError:
            raise ConfigurationError('UNRAR tool not available')

--- a/libs/subliminal/refiners/tvdb.py
+++ b/libs/subliminal/refiners/tvdb.py
@ -3,7 +3,7 @@ from datetime import datetime, timedelta
 from functools import wraps
 import logging
 import re
-
+import _strptime
 import requests

 from .. import __short_version__
--- a/libs/subliminal/subtitle.py
+++ b/libs/subliminal/subtitle.py
@ -6,8 +6,6 @@ import os
 import chardet
 import pysrt

-import types
-
 from .score import get_equivalent_release_groups
 from .video import Episode, Movie
 from .utils import sanitize, sanitize_release_group
@ -232,39 +230,16 @@ def guess_matches(video, guess, partial=False):
        if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
            matches.add('title')
    # release_group
-    if 'release_group' in guess:
-        release_groups = guess["release_group"]
-        if not isinstance(release_groups, types.ListType):
-            release_groups = [release_groups]
-
-        if video.release_group:
-            for release_group in release_groups:
-                if (sanitize_release_group(release_group) in
-                        get_equivalent_release_groups(sanitize_release_group(video.release_group))):
-                    matches.add('release_group')
-                    break
+    if (video.release_group and 'release_group' in guess and
+            sanitize_release_group(guess['release_group']) in
+            get_equivalent_release_groups(sanitize_release_group(video.release_group))):
+        matches.add('release_group')
    # resolution
    if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
        matches.add('resolution')
    # format
-    if 'format' in guess:
-        formats = guess["format"]
-        if not isinstance(formats, types.ListType):
-            formats = [formats]
-
-        if video.format:
-            video_format = video.format
-            if video_format in ("HDTV", "SDTV", "TV"):
-                video_format = "TV"
-                logger.debug("Treating HDTV/SDTV the same")
-
-            for frmt in formats:
-                if frmt in ("HDTV", "SDTV"):
-                    frmt = "TV"
-
-                if frmt.lower() == video_format.lower():
-                    matches.add('format')
-                    break
+    if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
+        matches.add('format')
    # video_codec
    if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
        matches.add('video_codec')