Skip to content

Commit

Permalink
commit: redesigned revlist and commit parsing, commits are always ret…
Browse files Browse the repository at this point in the history
…rieved from their object information directly. This is faster, and resolves issues with the rev-list format and empty commit messages

Adjusted many tests to go with the changes, as they were still mocked. The mock was removed if necessary and replaced by code that actually executes
  • Loading branch information
Byron committed Jun 2, 2010
1 parent 4e1c89e commit ae5a69f
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 265 deletions.
98 changes: 40 additions & 58 deletions lib/git/objects/commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,12 @@ def _get_intermediate_items(cls, commit):
return commit.parents

def _set_cache_(self, attr):
"""
Called by LazyMixin superclass when the given uninitialized member needs
""" Called by LazyMixin superclass when the given uninitialized member needs
to be set.
We set all values at once.
"""
We set all values at once. """
if attr in Commit.__slots__:
# read the data in a chunk, its faster - then provide a file wrapper
# Could use self.data, but lets try to get it with less calls
hexsha, typename, size, data = self.repo.git.get_object_data(self)
self._deserialize(StringIO(data))
else:
Expand Down Expand Up @@ -181,16 +180,16 @@ def iter_items(cls, repo, rev, paths='', **kwargs):
Returns
iterator yielding Commit items
"""
options = {'pretty': 'raw', 'as_process' : True }
options.update(kwargs)

if 'pretty' in kwargs:
raise ValueError("--pretty cannot be used as parsing expects single sha's only")
# END handle pretty
args = list()
if paths:
args.extend(('--', paths))
# END if paths

proc = repo.git.rev_list(rev, args, **options)
return cls._iter_from_process_or_stream(repo, proc, True)
proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
return cls._iter_from_process_or_stream(repo, proc)

def iter_parents(self, paths='', **kwargs):
"""
Expand Down Expand Up @@ -235,35 +234,30 @@ def stats(self):
return stats.Stats._list_from_string(self.repo, text)

@classmethod
def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
"""
Parse out commit information into a list of Commit objects
``repo``
is the Repo
``proc``
git-rev-list process instance (raw format)
def _iter_from_process_or_stream(cls, repo, proc_or_stream):
"""Parse out commit information into a list of Commit objects
We expect one-line per commit, and parse the actual commit information directly
from our lighting fast object database
``from_rev_list``
If True, the stream was created by rev-list in which case we parse
the message differently
Returns
iterator returning Commit objects
"""
:param proc: git-rev-list process instance - one sha per line
:return: iterator returning Commit objects"""
stream = proc_or_stream
if not hasattr(stream,'readline'):
stream = proc_or_stream.stdout

readline = stream.readline
while True:
line = stream.readline()
line = readline()
if not line:
break
commit_tokens = line.split()
id = commit_tokens[1]
assert commit_tokens[0] == "commit"
sha = line.strip()
if len(sha) > 40:
# split additional information, as returned by bisect for instance
sha, rest = line.split(None, 1)
# END handle extra info

yield Commit(repo, id)._deserialize(stream, from_rev_list)
assert len(sha) == 40, "Invalid line: %s" % sha
yield Commit(repo, sha)
# END for each line in stream


Expand Down Expand Up @@ -386,15 +380,16 @@ def _serialize(self, stream):
# for now, this is very inefficient and in fact shouldn't be used like this
return super(Commit, self)._serialize(stream)

def _deserialize(self, stream, from_rev_list=False):
def _deserialize(self, stream):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
self.tree = Tree(self.repo, stream.readline().split()[1], 0, '')
readline = stream.readline
self.tree = Tree(self.repo, readline().split()[1], 0, '')

self.parents = list()
next_line = None
while True:
parent_line = stream.readline()
parent_line = readline()
if not parent_line.startswith('parent'):
next_line = parent_line
break
Expand All @@ -404,37 +399,24 @@ def _deserialize(self, stream, from_rev_list=False):
self.parents = tuple(self.parents)

self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline())
self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())


# empty line
# now we can have the encoding line, or an empty line followed by the optional
# message.
self.encoding = self.default_encoding
enc = stream.readline()
enc.strip()
# read encoding or empty line to separate message
enc = readline()
enc = enc.strip()
if enc:
self.encoding = enc[enc.find(' ')+1:]
# END parse encoding

message_lines = list()
if from_rev_list:
while True:
msg_line = stream.readline()
if not msg_line.startswith(' '):
# and forget about this empty marker
# cut the last newline to get rid of the artificial newline added
# by rev-list command. Lets hope its just linux style \n
message_lines[-1] = message_lines[-1][:-1]
break
# END abort message reading
# strip leading 4 spaces
message_lines.append(msg_line[4:])
# END while there are message lines
self.message = ''.join(message_lines)
else:
# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
self.message = stream.read()[:-1]
# END message parsing
# now comes the message separator
readline()
# END handle encoding

# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
self.message = stream.read()[:-1]
return self

#} END serializable implementation
27 changes: 3 additions & 24 deletions test/fixtures/rev_list
Original file line number Diff line number Diff line change
@@ -1,24 +1,3 @@
commit 4c8124ffcf4039d292442eeccabdeca5af5c5017
tree 672eca9b7f9e09c22dcb128c283e8c3c8d7697a4
parent 634396b2f541a9f2d58b00be1a07f0c358b999b3
author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
committer Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700

implement Grit#heads

commit 634396b2f541a9f2d58b00be1a07f0c358b999b3
tree b35b4bf642d667fdd613eebcfe4e17efd420fb8a
author Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700
committer Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700

initial grit setup

commit ab25fd8483882c3bda8a458ad2965d2248654335
tree c20b5ec543bde1e43a931449b196052c06ed8acc
parent 6e64c55896aabb9a7d8e9f8f296f426d21a78c2c
parent 7f874954efb9ba35210445be456c74e037ba6af2
author Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700
committer Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700

Merge branch 'site'
Some other stuff
4c8124ffcf4039d292442eeccabdeca5af5c5017
634396b2f541a9f2d58b00be1a07f0c358b999b3
ab25fd8483882c3bda8a458ad2965d2248654335
8 changes: 4 additions & 4 deletions test/git/performance/test_commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php

from test.testlib import *
from lib import *
from git import *
from time import time
import sys

class TestPerformance(TestBase):
class TestPerformance(TestBigRepoReadOnly):

# ref with about 100 commits in its history
ref_100 = '0.1.6'
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_commit_traversal(self):
# bound to cat-file parsing performance
nc = 0
st = time()
for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False):
for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False):
nc += 1
self._query_commit_info(c)
# END for each traversed commit
Expand All @@ -59,7 +59,7 @@ def test_commit_iteration(self):
# bound to stream parsing performance
nc = 0
st = time()
for c in Commit.iter_items(self.rorepo, self.ref_100):
for c in Commit.iter_items(self.gitrepo, self.head_sha_2k):
nc += 1
self._query_commit_info(c)
# END for each traversed commit
Expand Down
Loading

0 comments on commit ae5a69f

Please sign in to comment.