bazarr/libs/pymediainfo/__init__.py
MoshiMoshi0 b56015e90b Add ability to use MediaInfo to refine video/audio properties (#479)
* Add ability to use MediaInfo to refine video/audio properties

* Remove pymediainfo from requirements.txt and add library files

* Look for .dll file if .exe was not found in get_binary

* Add pymediainfo to libs

* Switch to local MediaInfo library files

* Exit early if supported attributes are already set

* Log media info warnings to debug
2019-07-10 15:36:49 +02:00

320 lines
12 KiB
Python

# vim: set fileencoding=utf-8 :
import os
import re
import locale
import json
import ctypes
import sys
from pkg_resources import get_distribution, DistributionNotFound
import xml.etree.ElementTree as ET
try:
import pathlib
except ImportError:
pathlib = None
if sys.version_info < (3,):
import urlparse
else:
import urllib.parse as urlparse
try:
__version__ = get_distribution("pymediainfo").version
except DistributionNotFound:
pass
class Track(object):
"""
An object associated with a media file track.
Each :class:`Track` attribute corresponds to attributes parsed from MediaInfo's output.
All attributes are lower case. Attributes that are present several times such as Duration
yield a second attribute starting with `other_` which is a list of all alternative attribute values.
When a non-existing attribute is accessed, `None` is returned.
Example:
>>> t = mi.tracks[0]
>>> t
<Track track_id='None', track_type='General'>
>>> t.duration
3000
>>> t.to_data()["other_duration"]
['3 s 0 ms', '3 s 0 ms', '3 s 0 ms',
'00:00:03.000', '00:00:03.000']
>>> type(t.non_existing)
NoneType
All available attributes can be obtained by calling :func:`to_data`.
"""
def __eq__(self, other):
return self.__dict__ == other.__dict__
def __getattribute__(self, name):
try:
return object.__getattribute__(self, name)
except:
pass
return None
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__ = state
def __init__(self, xml_dom_fragment):
self.track_type = xml_dom_fragment.attrib['type']
for el in xml_dom_fragment:
node_name = el.tag.lower().strip().strip('_')
if node_name == 'id':
node_name = 'track_id'
node_value = el.text
other_node_name = "other_%s" % node_name
if getattr(self, node_name) is None:
setattr(self, node_name, node_value)
else:
if getattr(self, other_node_name) is None:
setattr(self, other_node_name, [node_value, ])
else:
getattr(self, other_node_name).append(node_value)
for o in [d for d in self.__dict__.keys() if d.startswith('other_')]:
try:
primary = o.replace('other_', '')
setattr(self, primary, int(getattr(self, primary)))
except:
for v in getattr(self, o):
try:
current = getattr(self, primary)
setattr(self, primary, int(v))
getattr(self, o).append(current)
break
except:
pass
def __repr__(self):
return("<Track track_id='{}', track_type='{}'>".format(self.track_id, self.track_type))
def to_data(self):
"""
Returns a dict representation of the track attributes.
Example:
>>> sorted(track.to_data().keys())[:3]
['codec', 'codec_extensions_usually_used', 'codec_url']
>>> t.to_data()["file_size"]
5988
:rtype: dict
"""
data = {}
for k, v in self.__dict__.items():
if k != 'xml_dom_fragment':
data[k] = v
return data
class MediaInfo(object):
"""
An object containing information about a media file.
:class:`MediaInfo` objects can be created by directly calling code from
libmediainfo (in this case, the library must be present on the system):
>>> pymediainfo.MediaInfo.parse("/path/to/file.mp4")
Alternatively, objects may be created from MediaInfo's XML output.
Such output can be obtained using the ``XML`` output format on versions older than v17.10
and the ``OLDXML`` format on newer versions.
Using such an XML file, we can create a :class:`MediaInfo` object:
>>> with open("output.xml") as f:
... mi = pymediainfo.MediaInfo(f.read())
:param str xml: XML output obtained from MediaInfo.
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
parameter before parsing `xml`.
:raises xml.etree.ElementTree.ParseError: if passed invalid XML.
:var tracks: A list of :py:class:`Track` objects which the media file contains.
For instance:
>>> mi = pymediainfo.MediaInfo.parse("/path/to/file.mp4")
>>> for t in mi.tracks:
... print(t)
<Track track_id='None', track_type='General'>
<Track track_id='1', track_type='Text'>
"""
def __eq__(self, other):
return self.tracks == other.tracks
def __init__(self, xml, encoding_errors="strict"):
xml_dom = ET.fromstring(xml.encode("utf-8", encoding_errors))
self.tracks = []
# This is the case for libmediainfo < 18.03
# https://github.com/sbraz/pymediainfo/issues/57
# https://github.com/MediaArea/MediaInfoLib/commit/575a9a32e6960ea34adb3bc982c64edfa06e95eb
if xml_dom.tag == "File":
xpath = "track"
else:
xpath = "File/track"
for xml_track in xml_dom.iterfind(xpath):
self.tracks.append(Track(xml_track))
@staticmethod
def _get_library(library_file=None):
os_is_nt = os.name in ("nt", "dos", "os2", "ce")
if os_is_nt:
lib_type = ctypes.WinDLL
else:
lib_type = ctypes.CDLL
if library_file is None:
if os_is_nt:
library_names = ("MediaInfo.dll",)
elif sys.platform == "darwin":
library_names = ("libmediainfo.0.dylib", "libmediainfo.dylib")
else:
library_names = ("libmediainfo.so.0",)
script_dir = os.path.dirname(__file__)
# Look for the library file in the script folder
for library in library_names:
lib_path = os.path.join(script_dir, library)
if os.path.isfile(lib_path):
# If we find it, don't try any other filename
library_names = (lib_path,)
break
else:
library_names = (library_file,)
for i, library in enumerate(library_names, start=1):
try:
lib = lib_type(library)
# Define arguments and return types
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
lib.MediaInfo_New.argtypes = []
lib.MediaInfo_New.restype = ctypes.c_void_p
lib.MediaInfo_Option.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p, ctypes.c_wchar_p]
lib.MediaInfo_Option.restype = ctypes.c_wchar_p
lib.MediaInfo_Inform.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
lib.MediaInfo_Open.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p]
lib.MediaInfo_Open.restype = ctypes.c_size_t
lib.MediaInfo_Delete.argtypes = [ctypes.c_void_p]
lib.MediaInfo_Delete.restype = None
lib.MediaInfo_Close.argtypes = [ctypes.c_void_p]
lib.MediaInfo_Close.restype = None
return lib
except OSError:
# If we've tried all possible filenames
if i == len(library_names):
raise
@classmethod
def can_parse(cls, library_file=None):
"""
Checks whether media files can be analyzed using libmediainfo.
:rtype: bool
"""
try:
cls._get_library(library_file)
return True
except:
return False
@classmethod
def parse(cls, filename, library_file=None, cover_data=False,
encoding_errors="strict", parse_speed=0.5, text=False,
full=True, legacy_stream_display=False):
"""
Analyze a media file using libmediainfo.
If libmediainfo is located in a non-standard location, the `library_file` parameter can be used:
>>> pymediainfo.MediaInfo.parse("tests/data/sample.mkv",
... library_file="/path/to/libmediainfo.dylib")
:param filename: path to the media file which will be analyzed.
A URL can also be used if libmediainfo was compiled
with CURL support.
:param str library_file: path to the libmediainfo library, this should only be used if the library cannot be auto-detected.
:param bool cover_data: whether to retrieve cover data as base64.
:param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
parameter before parsing MediaInfo's XML output.
:param float parse_speed: passed to the library as `ParseSpeed`,
this option takes values between 0 and 1.
A higher value will yield more precise results in some cases
but will also increase parsing time.
:param bool text: if ``True``, MediaInfo's text output will be returned instead
of a :class:`MediaInfo` object.
:param bool full: display additional tags, including computer-readable values
for sizes and durations.
:param bool legacy_stream_display: display additional information about streams.
:type filename: str or pathlib.Path
:rtype: str if `text` is ``True``.
:rtype: :class:`MediaInfo` otherwise.
:raises FileNotFoundError: if passed a non-existent file
(Python ≥ 3.3), does not work on Windows.
:raises IOError: if passed a non-existent file (Python < 3.3),
does not work on Windows.
:raises RuntimeError: if parsing fails, this should not
happen unless libmediainfo itself fails.
"""
lib = cls._get_library(library_file)
if pathlib is not None and isinstance(filename, pathlib.PurePath):
filename = str(filename)
url = False
else:
url = urlparse.urlparse(filename)
# Try to open the file (if it's not a URL)
# Doesn't work on Windows because paths are URLs
if not (url and url.scheme):
# Test whether the file is readable
with open(filename, "rb"):
pass
# Obtain the library version
lib_version = lib.MediaInfo_Option(None, "Info_Version", "")
lib_version = tuple(int(_) for _ in re.search("^MediaInfoLib - v(\\S+)", lib_version).group(1).split("."))
# The XML option was renamed starting with version 17.10
if lib_version >= (17, 10):
xml_option = "OLDXML"
else:
xml_option = "XML"
# Cover_Data is not extracted by default since version 18.03
# See https://github.com/MediaArea/MediaInfoLib/commit/d8fd88a1c282d1c09388c55ee0b46029e7330690
if cover_data and lib_version >= (18, 3):
lib.MediaInfo_Option(None, "Cover_Data", "base64")
# Create a MediaInfo handle
handle = lib.MediaInfo_New()
lib.MediaInfo_Option(handle, "CharSet", "UTF-8")
# Fix for https://github.com/sbraz/pymediainfo/issues/22
# Python 2 does not change LC_CTYPE
# at startup: https://bugs.python.org/issue6203
if (sys.version_info < (3,) and os.name == "posix"
and locale.getlocale() == (None, None)):
locale.setlocale(locale.LC_CTYPE, locale.getdefaultlocale())
lib.MediaInfo_Option(None, "Inform", "" if text else xml_option)
lib.MediaInfo_Option(None, "Complete", "1" if full else "")
lib.MediaInfo_Option(None, "ParseSpeed", str(parse_speed))
lib.MediaInfo_Option(None, "LegacyStreamDisplay", "1" if legacy_stream_display else "")
if lib.MediaInfo_Open(handle, filename) == 0:
raise RuntimeError("An eror occured while opening {}"
" with libmediainfo".format(filename))
output = lib.MediaInfo_Inform(handle, 0)
# Delete the handle
lib.MediaInfo_Close(handle)
lib.MediaInfo_Delete(handle)
if text:
return output
else:
return cls(output, encoding_errors)
def to_data(self):
"""
Returns a dict representation of the object's :py:class:`Tracks <Track>`.
:rtype: dict
"""
data = {'tracks': []}
for track in self.tracks:
data['tracks'].append(track.to_data())
return data
def to_json(self):
"""
Returns a JSON representation of the object's :py:class:`Tracks <Track>`.
:rtype: str
"""
return json.dumps(self.to_data())