mirror of
https://github.com/morpheus65535/bazarr.git
synced 2024-09-20 07:25:58 +08:00
Initial commit.
This commit is contained in:
parent
b9a987b57d
commit
d61bdfcd4f
|
@ -3,6 +3,7 @@ from get_argv import config_dir
|
|||
import sqlite3
|
||||
import os
|
||||
from subliminal import provider_manager
|
||||
from subliminal_patch import provider_manager
|
||||
import collections
|
||||
|
||||
def load_providers():
|
||||
|
|
|
@ -10,7 +10,9 @@ import time
|
|||
from datetime import datetime, timedelta
|
||||
from babelfish import Language
|
||||
from subliminal import region, scan_video, Video, download_best_subtitles, compute_score, save_subtitles, AsyncProviderPool, score, list_subtitles, download_subtitles
|
||||
from subliminal_patch import region, scan_video, Video, download_best_subtitles, compute_score, save_subtitles, AsyncProviderPool, score, list_subtitles, download_subtitles
|
||||
from subliminal.subtitle import get_subtitle_path
|
||||
from subliminal_patch.subtitle import get_subtitle_path
|
||||
from get_languages import language_from_alpha3, alpha2_from_alpha3, alpha3_from_alpha2
|
||||
from bs4 import UnicodeDammit
|
||||
from get_settings import get_general_settings, pp_replace, path_replace, path_replace_movie, path_replace_reverse, path_replace_reverse_movie
|
||||
|
|
|
@ -6,6 +6,7 @@ import enzyme
|
|||
import babelfish
|
||||
import logging
|
||||
from subliminal import core
|
||||
from subliminal_patch import core
|
||||
import sqlite3
|
||||
import ast
|
||||
import langdetect
|
||||
|
|
|
@ -21,15 +21,14 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
|||
# found in the LICENSE file.
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.6.3"
|
||||
__copyright__ = "Copyright (c) 2004-2018 Leonard Richardson"
|
||||
__version__ = "4.6.0"
|
||||
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
|
@ -83,46 +82,14 @@ class BeautifulSoup(Tag):
|
|||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
**kwargs):
|
||||
"""Constructor.
|
||||
|
||||
:param markup: A string or a file-like object representing
|
||||
markup to be parsed.
|
||||
|
||||
:param features: Desirable features of the parser to be used. This
|
||||
may be the name of a specific parser ("lxml", "lxml-xml",
|
||||
"html.parser", or "html5lib") or it may be the type of markup
|
||||
to be used ("html", "html5", "xml"). It's recommended that you
|
||||
name a specific parser, so that Beautiful Soup gives you the
|
||||
same results across platforms and virtual environments.
|
||||
|
||||
:param builder: A specific TreeBuilder to use instead of looking one
|
||||
up based on `features`. You shouldn't need to use this.
|
||||
|
||||
:param parse_only: A SoupStrainer. Only parts of the document
|
||||
matching the SoupStrainer will be considered. This is useful
|
||||
when parsing part of a document that would otherwise be too
|
||||
large to fit into memory.
|
||||
|
||||
:param from_encoding: A string indicating the encoding of the
|
||||
document to be parsed. Pass this in if Beautiful Soup is
|
||||
guessing wrongly about the document's encoding.
|
||||
|
||||
:param exclude_encodings: A list of strings indicating
|
||||
encodings known to be wrong. Pass this in if you don't know
|
||||
the document's encoding but you know Beautiful Soup's guess is
|
||||
wrong.
|
||||
|
||||
:param kwargs: For backwards compatibility purposes, the
|
||||
constructor accepts certain keyword arguments used in
|
||||
Beautiful Soup 3. None of these arguments do anything in
|
||||
Beautiful Soup 4 and there's no need to actually pass keyword
|
||||
arguments into the constructor.
|
||||
"""
|
||||
"""The Soup object is initialized as the 'root tag', and the
|
||||
provided markup (which can be a string or a file-like object)
|
||||
is fed into the underlying parser."""
|
||||
|
||||
if 'convertEntities' in kwargs:
|
||||
warnings.warn(
|
||||
|
@ -204,35 +171,14 @@ class BeautifulSoup(Tag):
|
|||
else:
|
||||
markup_type = "HTML"
|
||||
|
||||
# This code adapted from warnings.py so that we get the same line
|
||||
# of code as our warnings.warn() call gets, even if the answer is wrong
|
||||
# (as it may be in a multithreading situation).
|
||||
caller = None
|
||||
try:
|
||||
caller = sys._getframe(1)
|
||||
except ValueError:
|
||||
pass
|
||||
if caller:
|
||||
globals = caller.f_globals
|
||||
line_number = caller.f_lineno
|
||||
else:
|
||||
globals = sys.__dict__
|
||||
line_number= 1
|
||||
filename = globals.get('__file__')
|
||||
if filename:
|
||||
fnl = filename.lower()
|
||||
if fnl.endswith((".pyc", ".pyo")):
|
||||
filename = filename[:-1]
|
||||
if filename:
|
||||
# If there is no filename at all, the user is most likely in a REPL,
|
||||
# and the warning is not necessary.
|
||||
values = dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type
|
||||
)
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
|
||||
caller = traceback.extract_stack()[0]
|
||||
filename = caller[0]
|
||||
line_number = caller[1]
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type))
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
|
@ -356,10 +302,9 @@ class BeautifulSoup(Tag):
|
|||
self.preserve_whitespace_tag_stack = []
|
||||
self.pushTag(self)
|
||||
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, **kwattrs):
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
|
||||
"""Create a new tag associated with this soup."""
|
||||
kwattrs.update(attrs)
|
||||
return Tag(None, self.builder, name, namespace, nsprefix, kwattrs)
|
||||
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
|
||||
|
||||
def new_string(self, s, subclass=NavigableString):
|
||||
"""Create a new NavigableString associated with this soup."""
|
||||
|
|
|
@ -93,7 +93,7 @@ class TreeBuilder(object):
|
|||
preserve_whitespace_tags = set()
|
||||
empty_element_tags = None # A tag will be considered an empty-element
|
||||
# tag when and only when it has no contents.
|
||||
|
||||
|
||||
# A value for these tag/attribute combinations is a space- or
|
||||
# comma-separated list of CDATA, rather than a single CDATA.
|
||||
cdata_list_attributes = {}
|
||||
|
@ -125,7 +125,7 @@ class TreeBuilder(object):
|
|||
if self.empty_element_tags is None:
|
||||
return True
|
||||
return tag_name in self.empty_element_tags
|
||||
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
@ -235,17 +235,11 @@ class HTMLTreeBuilder(TreeBuilder):
|
|||
empty_element_tags = set([
|
||||
# These are from HTML5.
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
|
||||
# These are from earlier versions of HTML and are removed in HTML5.
|
||||
'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer'
|
||||
|
||||
# These are from HTML4, removed in HTML5.
|
||||
'spacer', 'frame'
|
||||
])
|
||||
|
||||
# The HTML standard defines these as block-level elements. Beautiful
|
||||
# Soup does not treat these elements differently from other elements,
|
||||
# but it may do so eventually, and this information is available if
|
||||
# you need to use it.
|
||||
block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
# class="foo bar" means that the 'class' attribute has two values,
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
# encoding: utf-8
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
|
@ -65,18 +64,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
|||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
def error(self, msg):
|
||||
"""In Python 3, HTMLParser subclasses must implement error(), although this
|
||||
requirement doesn't appear to be documented.
|
||||
|
||||
In Python 2, HTMLParser implements error() as raising an exception.
|
||||
|
||||
In any event, this method is called only on very strange markup and our best strategy
|
||||
is to pretend it didn't happen and keep going.
|
||||
"""
|
||||
warnings.warn(msg)
|
||||
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
# This is only called when the markup looks like
|
||||
# <tag/>.
|
||||
|
@ -141,26 +129,11 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
|||
else:
|
||||
real_name = int(name)
|
||||
|
||||
data = None
|
||||
if real_name < 256:
|
||||
# HTML numeric entities are supposed to reference Unicode
|
||||
# code points, but sometimes they reference code points in
|
||||
# some other encoding (ahem, Windows-1252). E.g. “
|
||||
# instead of É for LEFT DOUBLE QUOTATION MARK. This
|
||||
# code tries to detect this situation and compensate.
|
||||
for encoding in (self.soup.original_encoding, 'windows-1252'):
|
||||
if not encoding:
|
||||
continue
|
||||
try:
|
||||
data = bytearray([real_name]).decode(encoding)
|
||||
except UnicodeDecodeError, e:
|
||||
pass
|
||||
if not data:
|
||||
try:
|
||||
data = unichr(real_name)
|
||||
except (ValueError, OverflowError), e:
|
||||
pass
|
||||
data = data or u"\N{REPLACEMENT CHARACTER}"
|
||||
try:
|
||||
data = unichr(real_name)
|
||||
except (ValueError, OverflowError), e:
|
||||
data = u"\N{REPLACEMENT CHARACTER}"
|
||||
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
|
@ -168,12 +141,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
|||
if character is not None:
|
||||
data = character
|
||||
else:
|
||||
# If this were XML, it would be ambiguous whether "&foo"
|
||||
# was an character entity reference with a missing
|
||||
# semicolon or the literal string "&foo". Since this is
|
||||
# HTML, we have a complete list of all character entity references,
|
||||
# and this one wasn't found, so assume it's the literal string "&foo".
|
||||
data = "&%s" % name
|
||||
data = "&%s;" % name
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_comment(self, data):
|
||||
|
@ -245,7 +213,6 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
|||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
parser.close()
|
||||
except HTMLParseError, e:
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
|
|
|
@ -5,13 +5,9 @@ __all__ = [
|
|||
'LXMLTreeBuilder',
|
||||
]
|
||||
|
||||
try:
|
||||
from collections.abc import Callable # Python 3.6
|
||||
except ImportError , e:
|
||||
from collections import Callable
|
||||
|
||||
from io import BytesIO
|
||||
from StringIO import StringIO
|
||||
import collections
|
||||
from lxml import etree
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
|
@ -62,7 +58,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
|||
# Use the default parser.
|
||||
parser = self.default_parser(encoding)
|
||||
|
||||
if isinstance(parser, Callable):
|
||||
if isinstance(parser, collections.Callable):
|
||||
# Instantiate the parser with default arguments
|
||||
parser = parser(target=self, strip_cdata=False, encoding=encoding)
|
||||
return parser
|
||||
|
@ -151,11 +147,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
|||
attrs = dict(attrs)
|
||||
nsprefix = None
|
||||
# Invert each namespace map as it comes in.
|
||||
if len(nsmap) == 0 and len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
if len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
elif len(nsmap) > 0:
|
||||
# A new namespace mapping has come into play.
|
||||
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
|
||||
|
|
|
@ -46,9 +46,9 @@ except ImportError:
|
|||
pass
|
||||
|
||||
xml_encoding_re = re.compile(
|
||||
'^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I)
|
||||
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
|
||||
html_meta_re = re.compile(
|
||||
'<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
|
||||
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
|
||||
|
||||
class EntitySubstitution(object):
|
||||
|
||||
|
@ -82,7 +82,7 @@ class EntitySubstitution(object):
|
|||
}
|
||||
|
||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||
"&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)"
|
||||
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
||||
")")
|
||||
|
||||
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
|
||||
|
|
|
@ -37,7 +37,7 @@ def diagnose(data):
|
|||
name)
|
||||
|
||||
if 'lxml' in basic_parsers:
|
||||
basic_parsers.append("lxml-xml")
|
||||
basic_parsers.append(["lxml", "xml"])
|
||||
try:
|
||||
from lxml import etree
|
||||
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
|
||||
|
@ -56,27 +56,21 @@ def diagnose(data):
|
|||
|
||||
if hasattr(data, 'read'):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
return
|
||||
else:
|
||||
try:
|
||||
if os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
except ValueError:
|
||||
# This can happen on some platforms when the 'filename' is
|
||||
# too long. Assume it's data and not a filename.
|
||||
pass
|
||||
print
|
||||
print
|
||||
|
||||
for parser in basic_parsers:
|
||||
print "Trying to parse your markup with %s" % parser
|
||||
success = False
|
||||
try:
|
||||
soup = BeautifulSoup(data, features=parser)
|
||||
soup = BeautifulSoup(data, parser)
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
|
|
|
@ -2,10 +2,7 @@
|
|||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
try:
|
||||
from collections.abc import Callable # Python 3.6
|
||||
except ImportError , e:
|
||||
from collections import Callable
|
||||
import collections
|
||||
import re
|
||||
import shlex
|
||||
import sys
|
||||
|
@ -15,7 +12,7 @@ from bs4.dammit import EntitySubstitution
|
|||
DEFAULT_OUTPUT_ENCODING = "utf-8"
|
||||
PY3K = (sys.version_info[0] > 2)
|
||||
|
||||
whitespace_re = re.compile(r"\s+")
|
||||
whitespace_re = re.compile("\s+")
|
||||
|
||||
def _alias(attr):
|
||||
"""Alias one attribute name to another for backward compatibility"""
|
||||
|
@ -72,7 +69,7 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
|
|||
The value of the 'content' attribute will be one of these objects.
|
||||
"""
|
||||
|
||||
CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M)
|
||||
CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
|
||||
|
||||
def __new__(cls, original_value):
|
||||
match = cls.CHARSET_RE.search(original_value)
|
||||
|
@ -126,41 +123,6 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
|
|||
return cls._substitute_if_appropriate(
|
||||
ns, EntitySubstitution.substitute_xml)
|
||||
|
||||
class Formatter(object):
|
||||
"""Contains information about how to format a parse tree."""
|
||||
|
||||
# By default, represent void elements as <tag/> rather than <tag>
|
||||
void_element_close_prefix = '/'
|
||||
|
||||
def substitute_entities(self, *args, **kwargs):
|
||||
"""Transform certain characters into named entities."""
|
||||
raise NotImplementedError()
|
||||
|
||||
class HTMLFormatter(Formatter):
|
||||
"""The default HTML formatter."""
|
||||
def substitute(self, *args, **kwargs):
|
||||
return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)
|
||||
|
||||
class MinimalHTMLFormatter(Formatter):
|
||||
"""A minimal HTML formatter."""
|
||||
def substitute(self, *args, **kwargs):
|
||||
return HTMLAwareEntitySubstitution.substitute_xml(*args, **kwargs)
|
||||
|
||||
class HTML5Formatter(HTMLFormatter):
|
||||
"""An HTML formatter that omits the slash in a void tag."""
|
||||
void_element_close_prefix = None
|
||||
|
||||
class XMLFormatter(Formatter):
|
||||
"""Substitute only the essential XML entities."""
|
||||
def substitute(self, *args, **kwargs):
|
||||
return EntitySubstitution.substitute_xml(*args, **kwargs)
|
||||
|
||||
class HTMLXMLFormatter(Formatter):
|
||||
"""Format XML using HTML rules."""
|
||||
def substitute(self, *args, **kwargs):
|
||||
return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)
|
||||
|
||||
|
||||
class PageElement(object):
|
||||
"""Contains the navigational information for some part of the page
|
||||
(either a tag or a piece of text)"""
|
||||
|
@ -169,49 +131,40 @@ class PageElement(object):
|
|||
# to methods like encode() and prettify():
|
||||
#
|
||||
# "html" - All Unicode characters with corresponding HTML entities
|
||||
# are converted to those entities on output.
|
||||
# "html5" - The same as "html", but empty void tags are represented as
|
||||
# <tag> rather than <tag/>
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# are converted to those entities on output.
|
||||
# "minimal" - Bare ampersands and angle brackets are converted to
|
||||
# XML entities: & < >
|
||||
# None - The null formatter. Unicode characters are never
|
||||
# converted to entities. This is not recommended, but it's
|
||||
# faster than "minimal".
|
||||
# A callable function - it will be called on every string that needs to undergo entity substitution.
|
||||
# A Formatter instance - Formatter.substitute(string) will be called on every string that
|
||||
# A function - This function will be called on every string that
|
||||
# needs to undergo entity substitution.
|
||||
#
|
||||
|
||||
# In an HTML document, the default "html", "html5", and "minimal"
|
||||
# functions will leave the contents of <script> and <style> tags
|
||||
# alone. For an XML document, all tags will be given the same
|
||||
# treatment.
|
||||
# In an HTML document, the default "html" and "minimal" functions
|
||||
# will leave the contents of <script> and <style> tags alone. For
|
||||
# an XML document, all tags will be given the same treatment.
|
||||
|
||||
HTML_FORMATTERS = {
|
||||
"html" : HTMLFormatter(),
|
||||
"html5" : HTML5Formatter(),
|
||||
"minimal" : MinimalHTMLFormatter(),
|
||||
"html" : HTMLAwareEntitySubstitution.substitute_html,
|
||||
"minimal" : HTMLAwareEntitySubstitution.substitute_xml,
|
||||
None : None
|
||||
}
|
||||
|
||||
XML_FORMATTERS = {
|
||||
"html" : HTMLXMLFormatter(),
|
||||
"minimal" : XMLFormatter(),
|
||||
"html" : EntitySubstitution.substitute_html,
|
||||
"minimal" : EntitySubstitution.substitute_xml,
|
||||
None : None
|
||||
}
|
||||
|
||||
def format_string(self, s, formatter='minimal'):
|
||||
"""Format the given string using the given formatter."""
|
||||
if isinstance(formatter, basestring):
|
||||
if not callable(formatter):
|
||||
formatter = self._formatter_for_name(formatter)
|
||||
if formatter is None:
|
||||
output = s
|
||||
else:
|
||||
if callable(formatter):
|
||||
# Backwards compatibility -- you used to pass in a formatting method.
|
||||
output = formatter(s)
|
||||
else:
|
||||
output = formatter.substitute(s)
|
||||
output = formatter(s)
|
||||
return output
|
||||
|
||||
@property
|
||||
|
@ -241,9 +194,11 @@ class PageElement(object):
|
|||
def _formatter_for_name(self, name):
|
||||
"Look up a formatter function based on its name and the tree."
|
||||
if self._is_xml:
|
||||
return self.XML_FORMATTERS.get(name, XMLFormatter())
|
||||
return self.XML_FORMATTERS.get(
|
||||
name, EntitySubstitution.substitute_xml)
|
||||
else:
|
||||
return self.HTML_FORMATTERS.get(name, HTMLFormatter())
|
||||
return self.HTML_FORMATTERS.get(
|
||||
name, HTMLAwareEntitySubstitution.substitute_xml)
|
||||
|
||||
def setup(self, parent=None, previous_element=None, next_element=None,
|
||||
previous_sibling=None, next_sibling=None):
|
||||
|
@ -361,14 +316,6 @@ class PageElement(object):
|
|||
and not isinstance(new_child, NavigableString)):
|
||||
new_child = NavigableString(new_child)
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
if isinstance(new_child, BeautifulSoup):
|
||||
# We don't want to end up with a situation where one BeautifulSoup
|
||||
# object contains another. Insert the children one at a time.
|
||||
for subchild in list(new_child.contents):
|
||||
self.insert(position, subchild)
|
||||
position += 1
|
||||
return
|
||||
position = min(position, len(self.contents))
|
||||
if hasattr(new_child, 'parent') and new_child.parent is not None:
|
||||
# We're 'inserting' an element that's already one
|
||||
|
@ -589,21 +536,14 @@ class PageElement(object):
|
|||
elif isinstance(name, basestring):
|
||||
# Optimization to find all tags with a given name.
|
||||
if name.count(':') == 1:
|
||||
# This is a name with a prefix. If this is a namespace-aware document,
|
||||
# we need to match the local name against tag.name. If not,
|
||||
# we need to match the fully-qualified name against tag.name.
|
||||
prefix, local_name = name.split(':', 1)
|
||||
# This is a name with a prefix.
|
||||
prefix, name = name.split(':', 1)
|
||||
else:
|
||||
prefix = None
|
||||
local_name = name
|
||||
result = (element for element in generator
|
||||
if isinstance(element, Tag)
|
||||
and (
|
||||
element.name == name
|
||||
) or (
|
||||
element.name == local_name
|
||||
and (prefix is None or element.prefix == prefix)
|
||||
)
|
||||
and element.name == name
|
||||
and (prefix is None or element.prefix == prefix)
|
||||
)
|
||||
return ResultSet(strainer, result)
|
||||
results = ResultSet(strainer)
|
||||
|
@ -922,7 +862,7 @@ class Tag(PageElement):
|
|||
self.can_be_empty_element = builder.can_be_empty_element(name)
|
||||
else:
|
||||
self.can_be_empty_element = False
|
||||
|
||||
|
||||
parserClass = _alias("parser_class") # BS3
|
||||
|
||||
def __copy__(self):
|
||||
|
@ -1106,10 +1046,8 @@ class Tag(PageElement):
|
|||
# BS3: soup.aTag -> "soup.find("a")
|
||||
tag_name = tag[:-3]
|
||||
warnings.warn(
|
||||
'.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
|
||||
name=tag_name
|
||||
)
|
||||
)
|
||||
'.%sTag is deprecated, use .find("%s") instead.' % (
|
||||
tag_name, tag_name))
|
||||
return self.find(tag_name)
|
||||
# We special case contents to avoid recursion.
|
||||
elif not tag.startswith("__") and not tag == "contents":
|
||||
|
@ -1191,10 +1129,11 @@ class Tag(PageElement):
|
|||
encoding.
|
||||
"""
|
||||
|
||||
# First off, turn a string formatter into a Formatter object. This
|
||||
# First off, turn a string formatter into a function. This
|
||||
# will stop the lookup from happening over and over again.
|
||||
if not isinstance(formatter, Formatter) and not callable(formatter):
|
||||
if not callable(formatter):
|
||||
formatter = self._formatter_for_name(formatter)
|
||||
|
||||
attrs = []
|
||||
if self.attrs:
|
||||
for key, val in sorted(self.attrs.items()):
|
||||
|
@ -1223,9 +1162,7 @@ class Tag(PageElement):
|
|||
prefix = self.prefix + ":"
|
||||
|
||||
if self.is_empty_element:
|
||||
close = ''
|
||||
if isinstance(formatter, Formatter):
|
||||
close = formatter.void_element_close_prefix or close
|
||||
close = '/'
|
||||
else:
|
||||
closeTag = '</%s%s>' % (prefix, self.name)
|
||||
|
||||
|
@ -1296,9 +1233,9 @@ class Tag(PageElement):
|
|||
:param formatter: The output formatter responsible for converting
|
||||
entities to Unicode characters.
|
||||
"""
|
||||
# First off, turn a string formatter into a Formatter object. This
|
||||
# First off, turn a string formatter into a function. This
|
||||
# will stop the lookup from happening over and over again.
|
||||
if not isinstance(formatter, Formatter) and not callable(formatter):
|
||||
if not callable(formatter):
|
||||
formatter = self._formatter_for_name(formatter)
|
||||
|
||||
pretty_print = (indent_level is not None)
|
||||
|
@ -1411,29 +1348,15 @@ class Tag(PageElement):
|
|||
# Handle grouping selectors if ',' exists, ie: p,a
|
||||
if ',' in selector:
|
||||
context = []
|
||||
selectors = [x.strip() for x in selector.split(",")]
|
||||
|
||||
# If a selector is mentioned multiple times we don't want
|
||||
# to use it more than once.
|
||||
used_selectors = set()
|
||||
|
||||
# We also don't want to select the same element more than once,
|
||||
# if it's matched by multiple selectors.
|
||||
selected_object_ids = set()
|
||||
for partial_selector in selectors:
|
||||
for partial_selector in selector.split(','):
|
||||
partial_selector = partial_selector.strip()
|
||||
if partial_selector == '':
|
||||
raise ValueError('Invalid group selection syntax: %s' % selector)
|
||||
if partial_selector in used_selectors:
|
||||
continue
|
||||
used_selectors.add(partial_selector)
|
||||
candidates = self.select(partial_selector, limit=limit)
|
||||
for candidate in candidates:
|
||||
# This lets us distinguish between distinct tags that
|
||||
# represent the same markup.
|
||||
object_id = id(candidate)
|
||||
if object_id not in selected_object_ids:
|
||||
if candidate not in context:
|
||||
context.append(candidate)
|
||||
selected_object_ids.add(object_id)
|
||||
|
||||
if limit and len(context) >= limit:
|
||||
break
|
||||
return context
|
||||
|
@ -1495,7 +1418,7 @@ class Tag(PageElement):
|
|||
if tag_name == '':
|
||||
raise ValueError(
|
||||
"A pseudo-class must be prefixed with a tag name.")
|
||||
pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
|
||||
pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
|
||||
found = []
|
||||
if pseudo_attributes is None:
|
||||
pseudo_type = pseudo
|
||||
|
@ -1729,7 +1652,7 @@ class SoupStrainer(object):
|
|||
markup = markup_name
|
||||
markup_attrs = markup
|
||||
call_function_with_tag_data = (
|
||||
isinstance(self.name, Callable)
|
||||
isinstance(self.name, collections.Callable)
|
||||
and not isinstance(markup_name, Tag))
|
||||
|
||||
if ((not self.name)
|
||||
|
@ -1809,7 +1732,7 @@ class SoupStrainer(object):
|
|||
# True matches any non-None value.
|
||||
return markup is not None
|
||||
|
||||
if isinstance(match_against, Callable):
|
||||
if isinstance(match_against, collections.Callable):
|
||||
return match_against(markup)
|
||||
|
||||
# Custom callables take the tag as an argument, but all
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
# encoding: utf-8
|
||||
"""Helper classes for tests."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
|
@ -151,14 +150,6 @@ class HTMLTreeBuilderSmokeTest(object):
|
|||
soup.encode("utf-8").replace(b"\n", b""),
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_namespaced_html(self):
|
||||
"""When a namespaced XML document is parsed as HTML it should
|
||||
be treated as HTML with weird tag names.
|
||||
"""
|
||||
markup = b"""<ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(2, len(soup.find_all("ns1:foo")))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
# We test both Unicode and bytestring to verify that
|
||||
# process_markup correctly sets processing_instruction_class
|
||||
|
@ -320,26 +311,6 @@ Hello, world!
|
|||
def test_angle_brackets_in_attribute_values_are_escaped(self):
|
||||
self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>')
|
||||
|
||||
def test_strings_resembling_character_entity_references(self):
|
||||
# "&T" and "&p" look like incomplete character entities, but they are
|
||||
# not.
|
||||
self.assertSoupEquals(
|
||||
u"<p>• AT&T is in the s&p 500</p>",
|
||||
u"<p>\u2022 AT&T is in the s&p 500</p>"
|
||||
)
|
||||
|
||||
def test_entities_in_foreign_document_encoding(self):
|
||||
# “ and ” are invalid numeric entities referencing
|
||||
# Windows-1252 characters. - references a character common
|
||||
# to Windows-1252 and Unicode, and ☃ references a
|
||||
# character only found in Unicode.
|
||||
#
|
||||
# All of these entities should be converted to Unicode
|
||||
# characters.
|
||||
markup = "<p>“Hello” -☃</p>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEquals(u"“Hello” -☃", soup.p.string)
|
||||
|
||||
def test_entities_in_attributes_converted_to_unicode(self):
|
||||
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
|
@ -363,7 +334,7 @@ Hello, world!
|
|||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
|
||||
|
||||
def test_multipart_strings(self):
|
||||
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
|
||||
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
|
||||
|
@ -653,17 +624,6 @@ class XMLTreeBuilderSmokeTest(object):
|
|||
self.assertEqual(
|
||||
soup.encode("utf-8"), markup)
|
||||
|
||||
def test_nested_namespaces(self):
|
||||
doc = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<parent xmlns="http://ns1/">
|
||||
<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
|
||||
<grandchild ns3:attr="value" xmlns="http://ns4/"/>
|
||||
</child>
|
||||
</parent>"""
|
||||
soup = self.soup(doc)
|
||||
self.assertEqual(doc, soup.encode())
|
||||
|
||||
def test_formatter_processes_script_tag_for_xml_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
|
|
|
@ -5,7 +5,6 @@ from pdb import set_trace
|
|||
import pickle
|
||||
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
|
||||
|
||||
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
|
||||
|
@ -33,17 +32,3 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
|||
def test_redundant_empty_element_closing_tags(self):
|
||||
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('</br></br></br>', "")
|
||||
|
||||
def test_empty_element(self):
|
||||
# This verifies that any buffered data present when the parser
|
||||
# finishes working is handled.
|
||||
self.assertSoupEquals("foo &# bar", "foo &# bar")
|
||||
|
||||
|
||||
class TestHTMLParserSubclass(SoupTest):
|
||||
def test_error(self):
|
||||
"""Verify that our HTMLParser subclass implements error() in a way
|
||||
that doesn't cause a crash.
|
||||
"""
|
||||
parser = BeautifulSoupHTMLParser()
|
||||
parser.error("don't crash")
|
||||
|
|
|
@ -46,12 +46,6 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
|||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
|
||||
def test_entities_in_foreign_document_encoding(self):
|
||||
# We can't implement this case correctly because by the time we
|
||||
# hear about markup like "“", it's been (incorrectly) converted into
|
||||
# a string like u'\x93'
|
||||
pass
|
||||
|
||||
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
|
||||
# test if an old version of lxml is installed.
|
||||
|
||||
|
|
|
@ -605,7 +605,7 @@ class SiblingTest(TreeTest):
|
|||
</html>'''
|
||||
# All that whitespace looks good but makes the tests more
|
||||
# difficult. Get rid of it.
|
||||
markup = re.compile(r"\n\s*").sub("", markup)
|
||||
markup = re.compile("\n\s*").sub("", markup)
|
||||
self.tree = self.soup(markup)
|
||||
|
||||
|
||||
|
@ -703,12 +703,12 @@ class TestTagCreation(SoupTest):
|
|||
"""Test the ability to create new tags."""
|
||||
def test_new_tag(self):
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag("foo", bar="baz", attrs={"name": "a name"})
|
||||
new_tag = soup.new_tag("foo", bar="baz")
|
||||
self.assertTrue(isinstance(new_tag, Tag))
|
||||
self.assertEqual("foo", new_tag.name)
|
||||
self.assertEqual(dict(bar="baz", name="a name"), new_tag.attrs)
|
||||
self.assertEqual(dict(bar="baz"), new_tag.attrs)
|
||||
self.assertEqual(None, new_tag.parent)
|
||||
|
||||
|
||||
def test_tag_inherits_self_closing_rules_from_builder(self):
|
||||
if XML_BUILDER_PRESENT:
|
||||
xml_soup = BeautifulSoup("", "lxml-xml")
|
||||
|
@ -821,26 +821,6 @@ class TestTreeModification(SoupTest):
|
|||
soup = self.soup(text)
|
||||
self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
|
||||
|
||||
def test_insert_beautifulsoup_object_inserts_children(self):
|
||||
"""Inserting one BeautifulSoup object into another actually inserts all
|
||||
of its children -- you'll never combine BeautifulSoup objects.
|
||||
"""
|
||||
soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>")
|
||||
|
||||
text = "<p>p2</p><p>p3</p>"
|
||||
to_insert = self.soup(text)
|
||||
soup.insert(1, to_insert)
|
||||
|
||||
for i in soup.descendants:
|
||||
assert not isinstance(i, BeautifulSoup)
|
||||
|
||||
p1, p2, p3, p4 = list(soup.children)
|
||||
self.assertEquals("And now, a word:", p1.string)
|
||||
self.assertEquals("p2", p2.string)
|
||||
self.assertEquals("p3", p3.string)
|
||||
self.assertEquals("And we're back.", p4.string)
|
||||
|
||||
|
||||
def test_replace_with_maintains_next_element_throughout(self):
|
||||
soup = self.soup('<p><a>one</a><b>three</b></p>')
|
||||
a = soup.a
|
||||
|
@ -1206,7 +1186,7 @@ class TestElementObjects(SoupTest):
|
|||
tag = soup.bTag
|
||||
self.assertEqual(soup.b, tag)
|
||||
self.assertEqual(
|
||||
'.bTag is deprecated, use .find("b") instead. If you really were looking for a tag called bTag, use .find("bTag")',
|
||||
'.bTag is deprecated, use .find("b") instead.',
|
||||
str(w[0].message))
|
||||
|
||||
def test_has_attr(self):
|
||||
|
@ -1439,21 +1419,13 @@ class TestSubstitutions(SoupTest):
|
|||
u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
|
||||
|
||||
def test_formatter_html(self):
|
||||
markup = u"<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
|
||||
markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
|
||||
soup = self.soup(markup)
|
||||
decoded = soup.decode(formatter="html")
|
||||
self.assertEqual(
|
||||
decoded,
|
||||
self.document_for("<br/><b><<Sacré bleu!>></b>"))
|
||||
self.document_for("<b><<Sacré bleu!>></b>"))
|
||||
|
||||
def test_formatter_html5(self):
|
||||
markup = u"<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
|
||||
soup = self.soup(markup)
|
||||
decoded = soup.decode(formatter="html5")
|
||||
self.assertEqual(
|
||||
decoded,
|
||||
self.document_for("<br><b><<Sacré bleu!>></b>"))
|
||||
|
||||
def test_formatter_minimal(self):
|
||||
markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"
|
||||
soup = self.soup(markup)
|
||||
|
@ -1474,14 +1446,14 @@ class TestSubstitutions(SoupTest):
|
|||
self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
|
||||
|
||||
def test_formatter_custom(self):
|
||||
markup = u"<b><foo></b><b>bar</b><br/>"
|
||||
markup = u"<b><foo></b><b>bar</b>"
|
||||
soup = self.soup(markup)
|
||||
decoded = soup.decode(formatter = lambda x: x.upper())
|
||||
# Instead of normal entity conversion code, the custom
|
||||
# callable is called on every string.
|
||||
self.assertEqual(
|
||||
decoded,
|
||||
self.document_for(u"<b><FOO></b><b>BAR</b><br>"))
|
||||
self.document_for(u"<b><FOO></b><b>BAR</b>"))
|
||||
|
||||
def test_formatter_is_run_on_attribute_values(self):
|
||||
markup = u'<a href="http://a.com?a=b&c=é">e</a>'
|
||||
|
@ -1526,7 +1498,7 @@ class TestSubstitutions(SoupTest):
|
|||
u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>',
|
||||
soup.div.prettify())
|
||||
|
||||
def test_prettify_accepts_formatter_function(self):
|
||||
def test_prettify_accepts_formatter(self):
|
||||
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
|
||||
pretty = soup.prettify(formatter = lambda x: x.upper())
|
||||
self.assertTrue("FOO" in pretty)
|
||||
|
@ -2074,17 +2046,5 @@ class TestSoupSelector(TreeTest):
|
|||
def test_multiple_select_nested(self):
|
||||
self.assertSelects('body > div > x, y > z', ['xid', 'zidb'])
|
||||
|
||||
def test_select_duplicate_elements(self):
|
||||
# When markup contains duplicate elements, a multiple select
|
||||
# will find all of them.
|
||||
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
|
||||
soup = BeautifulSoup(markup, 'html.parser')
|
||||
selected = soup.select(".c1, .c2")
|
||||
self.assertEquals(3, len(selected))
|
||||
|
||||
# Verify that find_all finds the same elements, though because
|
||||
# of an implementation detail it finds them in a different
|
||||
# order.
|
||||
for element in soup.find_all(class_=['c1', 'c2']):
|
||||
assert element in selected
|
||||
|
||||
|
|
|
@ -1,6 +1,21 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import platform
|
||||
is_windows_special_path = False
|
||||
|
||||
if platform.system() == "Windows":
|
||||
try:
|
||||
__file__.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
is_windows_special_path = True
|
||||
|
||||
if not is_windows_special_path:
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
else:
|
||||
ThreadPoolExecutor = object
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import io
|
||||
import itertools
|
||||
|
@ -388,7 +403,7 @@ def search_external_subtitles(path, directory=None):
|
|||
subtitles = {}
|
||||
for p in os.listdir(directory or dirpath):
|
||||
# keep only valid subtitle filenames
|
||||
if not p.startswith(fileroot) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
|
||||
if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
|
||||
continue
|
||||
|
||||
# extract the potential language code
|
||||
|
@ -420,7 +435,7 @@ def scan_video(path):
|
|||
raise ValueError('Path does not exist')
|
||||
|
||||
# check video extension
|
||||
if not path.lower().endswith(VIDEO_EXTENSIONS):
|
||||
if not path.endswith(VIDEO_EXTENSIONS):
|
||||
raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
|
||||
|
||||
dirpath, filename = os.path.split(path)
|
||||
|
@ -468,7 +483,7 @@ def scan_archive(path):
|
|||
rar = RarFile(path)
|
||||
|
||||
# filter on video extensions
|
||||
rar_filenames = [f for f in rar.namelist() if f.lower().endswith(VIDEO_EXTENSIONS)]
|
||||
rar_filenames = [f for f in rar.namelist() if f.endswith(VIDEO_EXTENSIONS)]
|
||||
|
||||
# no video found
|
||||
if not rar_filenames:
|
||||
|
@ -521,26 +536,17 @@ def scan_videos(path, age=None, archives=True):
|
|||
if dirname.startswith('.'):
|
||||
logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
|
||||
dirnames.remove(dirname)
|
||||
# Skip Sample folder
|
||||
if dirname.lower() == 'sample':
|
||||
logger.debug('Skipping sample dirname %r in %r', dirname, dirpath)
|
||||
dirnames.remove(dirname)
|
||||
|
||||
# scan for videos
|
||||
for filename in filenames:
|
||||
# filter on videos and archives
|
||||
if not (filename.lower().endswith(VIDEO_EXTENSIONS) or
|
||||
archives and filename.lower().endswith(ARCHIVE_EXTENSIONS)):
|
||||
if not (filename.endswith(VIDEO_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
|
||||
continue
|
||||
|
||||
# skip hidden files
|
||||
if filename.startswith('.'):
|
||||
logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
|
||||
continue
|
||||
# skip 'sample' media files
|
||||
if os.path.splitext(filename)[0].lower() == 'sample':
|
||||
logger.debug('Skipping sample filename %r in %r', filename, dirpath)
|
||||
continue
|
||||
|
||||
# reconstruct the file path
|
||||
filepath = os.path.join(dirpath, filename)
|
||||
|
@ -562,13 +568,13 @@ def scan_videos(path, age=None, archives=True):
|
|||
continue
|
||||
|
||||
# scan
|
||||
if filename.lower().endswith(VIDEO_EXTENSIONS): # video
|
||||
if filename.endswith(VIDEO_EXTENSIONS): # video
|
||||
try:
|
||||
video = scan_video(filepath)
|
||||
except ValueError: # pragma: no cover
|
||||
logger.exception('Error scanning video')
|
||||
continue
|
||||
elif archives and filename.lower().endswith(ARCHIVE_EXTENSIONS): # archive
|
||||
elif archives and filename.endswith(ARCHIVE_EXTENSIONS): # archive
|
||||
try:
|
||||
video = scan_archive(filepath)
|
||||
except (NotRarFile, RarCannotExec, ValueError): # pragma: no cover
|
||||
|
|
|
@ -179,7 +179,7 @@ class Addic7edProvider(Provider):
|
|||
|
||||
# make the search
|
||||
logger.info('Searching show ids with %r', params)
|
||||
r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
|
||||
r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
|
||||
r.raise_for_status()
|
||||
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
|
||||
|
||||
|
|
|
@ -170,7 +170,7 @@ class LegendasTVProvider(Provider):
|
|||
|
||||
# Provider needs UNRAR installed. If not available raise ConfigurationError
|
||||
try:
|
||||
rarfile.custom_check([rarfile.UNRAR_TOOL], True)
|
||||
rarfile.custom_check(rarfile.UNRAR_TOOL)
|
||||
except rarfile.RarExecError:
|
||||
raise ConfigurationError('UNRAR tool not available')
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ from datetime import datetime, timedelta
|
|||
from functools import wraps
|
||||
import logging
|
||||
import re
|
||||
|
||||
import _strptime
|
||||
import requests
|
||||
|
||||
from .. import __short_version__
|
||||
|
|
|
@ -6,8 +6,6 @@ import os
|
|||
import chardet
|
||||
import pysrt
|
||||
|
||||
import types
|
||||
|
||||
from .score import get_equivalent_release_groups
|
||||
from .video import Episode, Movie
|
||||
from .utils import sanitize, sanitize_release_group
|
||||
|
@ -232,39 +230,16 @@ def guess_matches(video, guess, partial=False):
|
|||
if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
|
||||
matches.add('title')
|
||||
# release_group
|
||||
if 'release_group' in guess:
|
||||
release_groups = guess["release_group"]
|
||||
if not isinstance(release_groups, types.ListType):
|
||||
release_groups = [release_groups]
|
||||
|
||||
if video.release_group:
|
||||
for release_group in release_groups:
|
||||
if (sanitize_release_group(release_group) in
|
||||
get_equivalent_release_groups(sanitize_release_group(video.release_group))):
|
||||
matches.add('release_group')
|
||||
break
|
||||
if (video.release_group and 'release_group' in guess and
|
||||
sanitize_release_group(guess['release_group']) in
|
||||
get_equivalent_release_groups(sanitize_release_group(video.release_group))):
|
||||
matches.add('release_group')
|
||||
# resolution
|
||||
if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
|
||||
matches.add('resolution')
|
||||
# format
|
||||
if 'format' in guess:
|
||||
formats = guess["format"]
|
||||
if not isinstance(formats, types.ListType):
|
||||
formats = [formats]
|
||||
|
||||
if video.format:
|
||||
video_format = video.format
|
||||
if video_format in ("HDTV", "SDTV", "TV"):
|
||||
video_format = "TV"
|
||||
logger.debug("Treating HDTV/SDTV the same")
|
||||
|
||||
for frmt in formats:
|
||||
if frmt in ("HDTV", "SDTV"):
|
||||
frmt = "TV"
|
||||
|
||||
if frmt.lower() == video_format.lower():
|
||||
matches.add('format')
|
||||
break
|
||||
if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
|
||||
matches.add('format')
|
||||
# video_codec
|
||||
if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
|
||||
matches.add('video_codec')
|
||||
|
|
Loading…
Reference in a new issue