bazarr/libs/smmap/util.py
2018-09-16 20:33:04 -04:00

276 lines
9.1 KiB
Python

"""Module containing a memory memory manager which provides a sliding window on a number of memory mapped files"""
import os
import sys
from mmap import mmap, ACCESS_READ
try:
from mmap import ALLOCATIONGRANULARITY
except ImportError:
# in python pre 2.6, the ALLOCATIONGRANULARITY does not exist as it is mainly
# useful for aligning the offset. The offset argument doesn't exist there though
from mmap import PAGESIZE as ALLOCATIONGRANULARITY
# END handle pythons missing quality assurance
__all__ = ["align_to_mmap", "is_64_bit", "buffer",
"MapWindow", "MapRegion", "MapRegionList", "ALLOCATIONGRANULARITY"]
#{ Utilities
try:
# Python 2
buffer = buffer
except NameError:
# Python 3 has no `buffer`; only `memoryview`
def buffer(obj, offset, size):
# Actually, for gitpython this is fastest ... .
return memoryview(obj)[offset:offset+size]
# doing it directly is much faster !
# return obj[offset:offset + size]
def string_types():
if sys.version_info[0] >= 3:
return str
else:
return basestring
def align_to_mmap(num, round_up):
"""
Align the given integer number to the closest page offset, which usually is 4096 bytes.
:param round_up: if True, the next higher multiple of page size is used, otherwise
the lower page_size will be used (i.e. if True, 1 becomes 4096, otherwise it becomes 0)
:return: num rounded to closest page"""
res = (num // ALLOCATIONGRANULARITY) * ALLOCATIONGRANULARITY
if round_up and (res != num):
res += ALLOCATIONGRANULARITY
# END handle size
return res
def is_64_bit():
""":return: True if the system is 64 bit. Otherwise it can be assumed to be 32 bit"""
return sys.maxsize > (1 << 32) - 1
#}END utilities
#{ Utility Classes
class MapWindow(object):
"""Utility type which is used to snap windows towards each other, and to adjust their size"""
__slots__ = (
'ofs', # offset into the file in bytes
'size' # size of the window in bytes
)
def __init__(self, offset, size):
self.ofs = offset
self.size = size
def __repr__(self):
return "MapWindow(%i, %i)" % (self.ofs, self.size)
@classmethod
def from_region(cls, region):
""":return: new window from a region"""
return cls(region._b, region.size())
def ofs_end(self):
return self.ofs + self.size
def align(self):
"""Assures the previous window area is contained in the new one"""
nofs = align_to_mmap(self.ofs, 0)
self.size += self.ofs - nofs # keep size constant
self.ofs = nofs
self.size = align_to_mmap(self.size, 1)
def extend_left_to(self, window, max_size):
"""Adjust the offset to start where the given window on our left ends if possible,
but don't make yourself larger than max_size.
The resize will assure that the new window still contains the old window area"""
rofs = self.ofs - window.ofs_end()
nsize = rofs + self.size
rofs -= nsize - min(nsize, max_size)
self.ofs = self.ofs - rofs
self.size += rofs
def extend_right_to(self, window, max_size):
"""Adjust the size to make our window end where the right window begins, but don't
get larger than max_size"""
self.size = min(self.size + (window.ofs - self.ofs_end()), max_size)
class MapRegion(object):
"""Defines a mapped region of memory, aligned to pagesizes
**Note:** deallocates used region automatically on destruction"""
__slots__ = [
'_b', # beginning of mapping
'_mf', # mapped memory chunk (as returned by mmap)
'_uc', # total amount of usages
'_size', # cached size of our memory map
'__weakref__'
]
_need_compat_layer = sys.version_info[:2] < (2, 6)
if _need_compat_layer:
__slots__.append('_mfb') # mapped memory buffer to provide offset
# END handle additional slot
#{ Configuration
#} END configuration
def __init__(self, path_or_fd, ofs, size, flags=0):
"""Initialize a region, allocate the memory map
:param path_or_fd: path to the file to map, or the opened file descriptor
:param ofs: **aligned** offset into the file to be mapped
:param size: if size is larger then the file on disk, the whole file will be
allocated the the size automatically adjusted
:param flags: additional flags to be given when opening the file.
:raise Exception: if no memory can be allocated"""
self._b = ofs
self._size = 0
self._uc = 0
if isinstance(path_or_fd, int):
fd = path_or_fd
else:
fd = os.open(path_or_fd, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags)
# END handle fd
try:
kwargs = dict(access=ACCESS_READ, offset=ofs)
corrected_size = size
sizeofs = ofs
if self._need_compat_layer:
del(kwargs['offset'])
corrected_size += ofs
sizeofs = 0
# END handle python not supporting offset ! Arg
# have to correct size, otherwise (instead of the c version) it will
# bark that the size is too large ... many extra file accesses because
# if this ... argh !
actual_size = min(os.fstat(fd).st_size - sizeofs, corrected_size)
self._mf = mmap(fd, actual_size, **kwargs)
# END handle memory mode
self._size = len(self._mf)
if self._need_compat_layer:
self._mfb = buffer(self._mf, ofs, self._size)
# END handle buffer wrapping
finally:
if isinstance(path_or_fd, string_types()):
os.close(fd)
# END only close it if we opened it
# END close file handle
# We assume the first one to use us keeps us around
self.increment_client_count()
def __repr__(self):
return "MapRegion<%i, %i>" % (self._b, self.size())
#{ Interface
def buffer(self):
""":return: a buffer containing the memory"""
return self._mf
def map(self):
""":return: a memory map containing the memory"""
return self._mf
def ofs_begin(self):
""":return: absolute byte offset to the first byte of the mapping"""
return self._b
def size(self):
""":return: total size of the mapped region in bytes"""
return self._size
def ofs_end(self):
""":return: Absolute offset to one byte beyond the mapping into the file"""
return self._b + self._size
def includes_ofs(self, ofs):
""":return: True if the given offset can be read in our mapped region"""
return self._b <= ofs < self._b + self._size
def client_count(self):
""":return: number of clients currently using this region"""
return self._uc
def increment_client_count(self, ofs = 1):
"""Adjust the usage count by the given positive or negative offset.
If usage count equals 0, we will auto-release our resources
:return: True if we released resources, False otherwise. In the latter case, we can still be used"""
self._uc += ofs
assert self._uc > -1, "Increments must match decrements, usage counter negative: %i" % self._uc
if self.client_count() == 0:
self.release()
return True
else:
return False
# end handle release
def release(self):
"""Release all resources this instance might hold. Must only be called if there usage_count() is zero"""
self._mf.close()
# re-define all methods which need offset adjustments in compatibility mode
if _need_compat_layer:
def size(self):
return self._size - self._b
def ofs_end(self):
# always the size - we are as large as it gets
return self._size
def buffer(self):
return self._mfb
def includes_ofs(self, ofs):
return self._b <= ofs < self._size
# END handle compat layer
#} END interface
class MapRegionList(list):
"""List of MapRegion instances associating a path with a list of regions."""
__slots__ = (
'_path_or_fd', # path or file descriptor which is mapped by all our regions
'_file_size' # total size of the file we map
)
def __new__(cls, path):
return super(MapRegionList, cls).__new__(cls)
def __init__(self, path_or_fd):
self._path_or_fd = path_or_fd
self._file_size = None
def path_or_fd(self):
""":return: path or file descriptor we are attached to"""
return self._path_or_fd
def file_size(self):
""":return: size of file we manager"""
if self._file_size is None:
if isinstance(self._path_or_fd, string_types()):
self._file_size = os.stat(self._path_or_fd).st_size
else:
self._file_size = os.fstat(self._path_or_fd).st_size
# END handle path type
# END update file size
return self._file_size
#} END utility classes