bazarr/libs/git/objects/commit.py

534 lines
20 KiB
Python
Raw Normal View History

# commit.py
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb import IStream
from git.util import (
hex_to_bin,
Actor,
Iterable,
Stats,
finalize_process
)
from git.diff import Diffable
from .tree import Tree
from . import base
from .util import (
Traversable,
Serializable,
parse_date,
altz_to_utctz_str,
parse_actor_and_date,
from_timestamp,
)
from git.compat import text_type
from time import (
time,
daylight,
altzone,
timezone,
localtime
)
import os
from io import BytesIO
import logging
log = logging.getLogger('git.objects.commit')
log.addHandler(logging.NullHandler())
__all__ = ('Commit', )
class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
"""Wraps a git Commit object.
This class will act lazily on some of its attributes and will query the
value on demand only if it involves calling the git binary."""
# ENVIRONMENT VARIABLES
# read when creating new commits
env_author_date = "GIT_AUTHOR_DATE"
env_committer_date = "GIT_COMMITTER_DATE"
# CONFIGURATION KEYS
conf_encoding = 'i18n.commitencoding'
# INVARIANTS
default_encoding = "UTF-8"
# object configuration
type = "commit"
__slots__ = ("tree",
"author", "authored_date", "author_tz_offset",
"committer", "committed_date", "committer_tz_offset",
"message", "parents", "encoding", "gpgsig")
_id_attribute_ = "hexsha"
def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
committer=None, committed_date=None, committer_tz_offset=None,
message=None, parents=None, encoding=None, gpgsig=None):
"""Instantiate a new Commit. All keyword arguments taking None as default will
be implicitly set on first query.
:param binsha: 20 byte sha1
:param parents: tuple( Commit, ... )
is a tuple of commit ids or actual Commits
:param tree: Tree
Tree object
:param author: Actor
is the author string ( will be implicitly converted into an Actor object )
:param authored_date: int_seconds_since_epoch
is the authored DateTime - use time.gmtime() to convert it into a
different format
:param author_tz_offset: int_seconds_west_of_utc
is the timezone that the authored_date is in
:param committer: Actor
is the committer string
:param committed_date: int_seconds_since_epoch
is the committed DateTime - use time.gmtime() to convert it into a
different format
:param committer_tz_offset: int_seconds_west_of_utc
is the timezone that the committed_date is in
:param message: string
is the commit message
:param encoding: string
encoding of the message, defaults to UTF-8
:param parents:
List or tuple of Commit objects which are our parent(s) in the commit
dependency graph
:return: git.Commit
:note:
Timezone information is in the same format and in the same sign
as what time.altzone returns. The sign is inverted compared to git's
UTC timezone."""
super(Commit, self).__init__(repo, binsha)
if tree is not None:
assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
if tree is not None:
self.tree = tree
if author is not None:
self.author = author
if authored_date is not None:
self.authored_date = authored_date
if author_tz_offset is not None:
self.author_tz_offset = author_tz_offset
if committer is not None:
self.committer = committer
if committed_date is not None:
self.committed_date = committed_date
if committer_tz_offset is not None:
self.committer_tz_offset = committer_tz_offset
if message is not None:
self.message = message
if parents is not None:
self.parents = parents
if encoding is not None:
self.encoding = encoding
if gpgsig is not None:
self.gpgsig = gpgsig
@classmethod
def _get_intermediate_items(cls, commit):
return commit.parents
def _set_cache_(self, attr):
if attr in Commit.__slots__:
# read the data in a chunk, its faster - then provide a file wrapper
binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) # @UnusedVariable
self._deserialize(BytesIO(stream.read()))
else:
super(Commit, self)._set_cache_(attr)
# END handle attrs
@property
def authored_datetime(self):
return from_timestamp(self.authored_date, self.author_tz_offset)
@property
def committed_datetime(self):
return from_timestamp(self.committed_date, self.committer_tz_offset)
@property
def summary(self):
""":return: First line of the commit message"""
return self.message.split('\n', 1)[0]
def count(self, paths='', **kwargs):
"""Count the number of commits reachable from this commit
:param paths:
is an optional path or a list of paths restricting the return value
to commits actually containing the paths
:param kwargs:
Additional options to be passed to git-rev-list. They must not alter
the output style of the command, or parsing will yield incorrect results
:return: int defining the number of reachable commits"""
# yes, it makes a difference whether empty paths are given or not in our case
# as the empty paths version will ignore merge commits for some reason.
if paths:
return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines())
else:
return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines())
@property
def name_rev(self):
"""
:return:
String describing the commits hex sha based on the closest Reference.
Mostly useful for UI purposes"""
return self.repo.git.name_rev(self)
@classmethod
def iter_items(cls, repo, rev, paths='', **kwargs):
"""Find all commits matching the given criteria.
:param repo: is the Repo
:param rev: revision specifier, see git-rev-parse for viable options
:param paths:
is an optional path or list of paths, if set only Commits that include the path
or paths will be considered
:param kwargs:
optional keyword arguments to git rev-list where
``max_count`` is the maximum number of commits to fetch
``skip`` is the number of commits to skip
``since`` all commits since i.e. '1970-01-01'
:return: iterator yielding Commit items"""
if 'pretty' in kwargs:
raise ValueError("--pretty cannot be used as parsing expects single sha's only")
# END handle pretty
# use -- in any case, to prevent possibility of ambiguous arguments
# see https://github.com/gitpython-developers/GitPython/issues/264
args = ['--']
if paths:
args.extend((paths, ))
# END if paths
proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
return cls._iter_from_process_or_stream(repo, proc)
def iter_parents(self, paths='', **kwargs):
"""Iterate _all_ parents of this commit.
:param paths:
Optional path or list of paths limiting the Commits to those that
contain at least one of the paths
:param kwargs: All arguments allowed by git-rev-list
:return: Iterator yielding Commit objects which are parents of self """
# skip ourselves
skip = kwargs.get("skip", 1)
if skip == 0: # skip ourselves
skip = 1
kwargs['skip'] = skip
return self.iter_items(self.repo, self, paths, **kwargs)
@property
def stats(self):
"""Create a git stat from changes between this commit and its first parent
or from all changes done if this is the very first commit.
:return: git.Stats"""
if not self.parents:
text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True)
text2 = ""
for line in text.splitlines()[1:]:
(insertions, deletions, filename) = line.split("\t")
text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
text = text2
else:
text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
return Stats._list_from_string(self.repo, text)
@classmethod
def _iter_from_process_or_stream(cls, repo, proc_or_stream):
"""Parse out commit information into a list of Commit objects
We expect one-line per commit, and parse the actual commit information directly
from our lighting fast object database
:param proc: git-rev-list process instance - one sha per line
:return: iterator returning Commit objects"""
stream = proc_or_stream
if not hasattr(stream, 'readline'):
stream = proc_or_stream.stdout
readline = stream.readline
while True:
line = readline()
if not line:
break
hexsha = line.strip()
if len(hexsha) > 40:
# split additional information, as returned by bisect for instance
hexsha, _ = line.split(None, 1)
# END handle extra info
assert len(hexsha) == 40, "Invalid line: %s" % hexsha
yield Commit(repo, hex_to_bin(hexsha))
# END for each line in stream
# TODO: Review this - it seems process handling got a bit out of control
# due to many developers trying to fix the open file handles issue
if hasattr(proc_or_stream, 'wait'):
finalize_process(proc_or_stream)
@classmethod
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False, author=None, committer=None,
author_date=None, commit_date=None):
"""Commit the given tree, creating a commit object.
:param repo: Repo object the commit should be part of
:param tree: Tree object or hex or bin sha
the tree of the new commit
:param message: Commit message. It may be an empty string if no message is provided.
It will be converted to a string in any case.
:param parent_commits:
Optional Commit objects to use as parents for the new commit.
If empty list, the commit will have no parents at all and become
a root commit.
If None , the current head commit will be the parent of the
new commit object
:param head:
If True, the HEAD will be advanced to the new commit automatically.
Else the HEAD will remain pointing on the previous commit. This could
lead to undesired results when diffing files.
:param author: The name of the author, optional. If unset, the repository
configuration is used to obtain this value.
:param committer: The name of the committer, optional. If unset, the
repository configuration is used to obtain this value.
:param author_date: The timestamp for the author field
:param commit_date: The timestamp for the committer field
:return: Commit object representing the new commit
:note:
Additional information about the committer and Author are taken from the
environment or from the git configuration, see git-commit-tree for
more information"""
if parent_commits is None:
try:
parent_commits = [repo.head.commit]
except ValueError:
# empty repositories have no head commit
parent_commits = []
# END handle parent commits
else:
for p in parent_commits:
if not isinstance(p, cls):
raise ValueError("Parent commit '%r' must be of type %s" % (p, cls))
# end check parent commit types
# END if parent commits are unset
# retrieve all additional information, create a commit object, and
# serialize it
# Generally:
# * Environment variables override configuration values
# * Sensible defaults are set according to the git documentation
# COMMITER AND AUTHOR INFO
cr = repo.config_reader()
env = os.environ
committer = committer or Actor.committer(cr)
author = author or Actor.author(cr)
# PARSE THE DATES
unix_time = int(time())
is_dst = daylight and localtime().tm_isdst > 0
offset = altzone if is_dst else timezone
author_date_str = env.get(cls.env_author_date, '')
if author_date:
author_time, author_offset = parse_date(author_date)
elif author_date_str:
author_time, author_offset = parse_date(author_date_str)
else:
author_time, author_offset = unix_time, offset
# END set author time
committer_date_str = env.get(cls.env_committer_date, '')
if commit_date:
committer_time, committer_offset = parse_date(commit_date)
elif committer_date_str:
committer_time, committer_offset = parse_date(committer_date_str)
else:
committer_time, committer_offset = unix_time, offset
# END set committer time
# assume utf8 encoding
enc_section, enc_option = cls.conf_encoding.split('.')
conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
# if the tree is no object, make sure we create one - otherwise
# the created commit object is invalid
if isinstance(tree, str):
tree = repo.tree(tree)
# END tree conversion
# CREATE NEW COMMIT
new_commit = cls(repo, cls.NULL_BIN_SHA, tree,
author, author_time, author_offset,
committer, committer_time, committer_offset,
message, parent_commits, conf_encoding)
stream = BytesIO()
new_commit._serialize(stream)
streamlen = stream.tell()
stream.seek(0)
istream = repo.odb.store(IStream(cls.type, streamlen, stream))
new_commit.binsha = istream.binsha
if head:
# need late import here, importing git at the very beginning throws
# as well ...
import git.refs
try:
repo.head.set_commit(new_commit, logmsg=message)
except ValueError:
# head is not yet set to the ref our HEAD points to
# Happens on first commit
master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message)
repo.head.set_reference(master, logmsg='commit: Switching to %s' % master)
# END handle empty repositories
# END advance head handling
return new_commit
#{ Serializable Implementation
def _serialize(self, stream):
write = stream.write
write(("tree %s\n" % self.tree).encode('ascii'))
for p in self.parents:
write(("parent %s\n" % p).encode('ascii'))
a = self.author
aname = a.name
c = self.committer
fmt = "%s %s <%s> %s %s\n"
write((fmt % ("author", aname, a.email,
self.authored_date,
altz_to_utctz_str(self.author_tz_offset))).encode(self.encoding))
# encode committer
aname = c.name
write((fmt % ("committer", aname, c.email,
self.committed_date,
altz_to_utctz_str(self.committer_tz_offset))).encode(self.encoding))
if self.encoding != self.default_encoding:
write(("encoding %s\n" % self.encoding).encode('ascii'))
try:
if self.__getattribute__('gpgsig') is not None:
write(b"gpgsig")
for sigline in self.gpgsig.rstrip("\n").split("\n"):
write((" " + sigline + "\n").encode('ascii'))
except AttributeError:
pass
write(b"\n")
# write plain bytes, be sure its encoded according to our encoding
if isinstance(self.message, text_type):
write(self.message.encode(self.encoding))
else:
write(self.message)
# END handle encoding
return self
def _deserialize(self, stream):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
readline = stream.readline
self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '')
self.parents = []
next_line = None
while True:
parent_line = readline()
if not parent_line.startswith(b'parent'):
next_line = parent_line
break
# END abort reading parents
self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii'))))
# END for each parent line
self.parents = tuple(self.parents)
# we don't know actual author encoding before we have parsed it, so keep the lines around
author_line = next_line
committer_line = readline()
# we might run into one or more mergetag blocks, skip those for now
next_line = readline()
while next_line.startswith(b'mergetag '):
next_line = readline()
while next_line.startswith(b' '):
next_line = readline()
# end skip mergetags
# now we can have the encoding line, or an empty line followed by the optional
# message.
self.encoding = self.default_encoding
self.gpgsig = None
# read headers
enc = next_line
buf = enc.strip()
while buf:
if buf[0:10] == b"encoding ":
self.encoding = buf[buf.find(' ') + 1:].decode('ascii')
elif buf[0:7] == b"gpgsig ":
sig = buf[buf.find(b' ') + 1:] + b"\n"
is_next_header = False
while True:
sigbuf = readline()
if not sigbuf:
break
if sigbuf[0:1] != b" ":
buf = sigbuf.strip()
is_next_header = True
break
sig += sigbuf[1:]
# end read all signature
self.gpgsig = sig.rstrip(b"\n").decode('ascii')
if is_next_header:
continue
buf = readline().strip()
# decode the authors name
try:
self.author, self.authored_date, self.author_tz_offset = \
parse_actor_and_date(author_line.decode(self.encoding, 'replace'))
except UnicodeDecodeError:
log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding,
exc_info=True)
try:
self.committer, self.committed_date, self.committer_tz_offset = \
parse_actor_and_date(committer_line.decode(self.encoding, 'replace'))
except UnicodeDecodeError:
log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding,
exc_info=True)
# END handle author's encoding
# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
self.message = stream.read()
try:
self.message = self.message.decode(self.encoding, 'replace')
except UnicodeDecodeError:
log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True)
# END exception handling
return self
#} END serializable implementation