Skip to content
This repository has been archived by the owner on Sep 11, 2020. It is now read-only.

Commit

Permalink
Merge pull request #1128 from filipnavara/commitgraph-fmt
Browse files Browse the repository at this point in the history
plumbing: format/commitgraph, add APIs for reading and writing commit-graph files
  • Loading branch information
mcuadros authored Apr 24, 2019
2 parents 44a20de + ab5b89c commit 4a62292
Show file tree
Hide file tree
Showing 5 changed files with 689 additions and 0 deletions.
35 changes: 35 additions & 0 deletions plumbing/format/commitgraph/commitgraph.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package commitgraph

import (
"time"

"gopkg.in/src-d/go-git.v4/plumbing"
)

// Node is a reduced representation of Commit as presented in the commit graph
// file. It is merely useful as an optimization for walking the commit graphs.
type Node struct {
// TreeHash is the hash of the root tree of the commit.
TreeHash plumbing.Hash
// ParentIndexes are the indexes of the parent commits of the commit.
ParentIndexes []int
// ParentHashes are the hashes of the parent commits of the commit.
ParentHashes []plumbing.Hash
// Generation number is the pre-computed generation in the commit graph
// or zero if not available
Generation int
// When is the timestamp of the commit.
When time.Time
}

// Index represents a representation of commit graph that allows indexed
// access to the nodes using commit object hash
type Index interface {
// GetIndexByHash gets the index in the commit graph from commit hash, if available
GetIndexByHash(h plumbing.Hash) (int, error)
// GetNodeByIndex gets the commit node from the commit graph using index
// obtained from child node, if available
GetNodeByIndex(i int) (*Node, error)
// Hashes returns all the hashes that are available in the index
Hashes() []plumbing.Hash
}
135 changes: 135 additions & 0 deletions plumbing/format/commitgraph/commitgraph_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package commitgraph_test

import (
"io/ioutil"
"os"
"path"
"testing"

"golang.org/x/exp/mmap"

. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph"
)

func Test(t *testing.T) { TestingT(t) }

type CommitgraphSuite struct {
fixtures.Suite
}

var _ = Suite(&CommitgraphSuite{})

func testDecodeHelper(c *C, path string) {
reader, err := mmap.Open(path)
c.Assert(err, IsNil)
defer reader.Close()
index, err := commitgraph.OpenFileIndex(reader)
c.Assert(err, IsNil)

// Root commit
nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe"))
c.Assert(err, IsNil)
node, err := index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 0)
c.Assert(len(node.ParentHashes), Equals, 0)

// Regular commit
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643"))
c.Assert(err, IsNil)
node, err = index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 1)
c.Assert(len(node.ParentHashes), Equals, 1)
c.Assert(node.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe")

// Merge commit
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d"))
c.Assert(err, IsNil)
node, err = index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 2)
c.Assert(len(node.ParentHashes), Equals, 2)
c.Assert(node.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
c.Assert(node.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")

// Octopus merge commit
nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
c.Assert(err, IsNil)
node, err = index.GetNodeByIndex(nodeIndex)
c.Assert(err, IsNil)
c.Assert(len(node.ParentIndexes), Equals, 3)
c.Assert(len(node.ParentHashes), Equals, 3)
c.Assert(node.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c")
c.Assert(node.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1")
c.Assert(node.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082")

// Check all hashes
hashes := index.Hashes()
c.Assert(len(hashes), Equals, 11)
c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
}

func (s *CommitgraphSuite) TestDecode(c *C) {
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
dotgit := f.DotGit()
testDecodeHelper(c, path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
})
}

func (s *CommitgraphSuite) TestReencode(c *C) {
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
dotgit := f.DotGit()

reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
c.Assert(err, IsNil)
defer reader.Close()
index, err := commitgraph.OpenFileIndex(reader)
c.Assert(err, IsNil)

writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
c.Assert(err, IsNil)
tmpName := writer.Name()
defer os.Remove(tmpName)
encoder := commitgraph.NewEncoder(writer)
err = encoder.Encode(index)
c.Assert(err, IsNil)
writer.Close()

testDecodeHelper(c, tmpName)
})
}

func (s *CommitgraphSuite) TestReencodeInMemory(c *C) {
fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
dotgit := f.DotGit()

reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
c.Assert(err, IsNil)
index, err := commitgraph.OpenFileIndex(reader)
c.Assert(err, IsNil)
memoryIndex := commitgraph.NewMemoryIndex()
for i, hash := range index.Hashes() {
node, err := index.GetNodeByIndex(i)
c.Assert(err, IsNil)
err = memoryIndex.Add(hash, node)
c.Assert(err, IsNil)
}
reader.Close()

writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
c.Assert(err, IsNil)
tmpName := writer.Name()
defer os.Remove(tmpName)
encoder := commitgraph.NewEncoder(writer)
err = encoder.Encode(memoryIndex)
c.Assert(err, IsNil)
writer.Close()

testDecodeHelper(c, tmpName)
})
}
189 changes: 189 additions & 0 deletions plumbing/format/commitgraph/encoder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package commitgraph

import (
"crypto/sha1"
"hash"
"io"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/binary"
)

// Encoder writes MemoryIndex structs to an output stream.
type Encoder struct {
io.Writer
hash hash.Hash
}

// NewEncoder returns a new stream encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
h := sha1.New()
mw := io.MultiWriter(w, h)
return &Encoder{mw, h}
}

func (e *Encoder) Encode(idx Index) error {
var err error

// Get all the hashes in the input index
hashes := idx.Hashes()

// Sort the inout and prepare helper structures we'll need for encoding
hashToIndex, fanout, largeEdgesCount := e.prepare(idx, hashes)

chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature}
chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36}
if largeEdgesCount > 0 {
chunkSignatures = append(chunkSignatures, largeEdgeListSignature)
chunkSizes = append(chunkSizes, uint64(largeEdgesCount)*4)
}

if err = e.encodeFileHeader(len(chunkSignatures)); err != nil {
return err
}
if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil {
return err
}
if err = e.encodeFanout(fanout); err != nil {
return err
}
if err = e.encodeOidLookup(hashes); err != nil {
return err
}
if largeEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil {
if err = e.encodeLargeEdges(largeEdges); err != nil {
return err
}
}
if err != nil {
return err
}
return e.encodeChecksum()
}

func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, largeEdgesCount uint32) {
// Sort the hashes and build our index
plumbing.HashesSort(hashes)
hashToIndex = make(map[plumbing.Hash]uint32)
fanout = make([]uint32, 256)
for i, hash := range hashes {
hashToIndex[hash] = uint32(i)
fanout[hash[0]]++
}

// Convert the fanout to cumulative values
for i := 1; i <= 0xff; i++ {
fanout[i] += fanout[i-1]
}

// Find out if we will need large edge table
for i := 0; i < len(hashes); i++ {
v, _ := idx.GetNodeByIndex(i)
if len(v.ParentHashes) > 2 {
largeEdgesCount += uint32(len(v.ParentHashes) - 1)
break
}
}

return
}

func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
if _, err = e.Write(commitFileSignature); err == nil {
_, err = e.Write([]byte{1, 1, byte(chunkCount), 0})
}
return
}

func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) {
// 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator
offset := uint64(8 + len(chunkSignatures)*12 + 12)
for i, signature := range chunkSignatures {
if _, err = e.Write(signature); err == nil {
err = binary.WriteUint64(e, offset)
}
if err != nil {
return
}
offset += chunkSizes[i]
}
if _, err = e.Write(lastSignature); err == nil {
err = binary.WriteUint64(e, offset)
}
return
}

func (e *Encoder) encodeFanout(fanout []uint32) (err error) {
for i := 0; i <= 0xff; i++ {
if err = binary.WriteUint32(e, fanout[i]); err != nil {
return
}
}
return
}

func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) {
for _, hash := range hashes {
if _, err = e.Write(hash[:]); err != nil {
return err
}
}
return
}

func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (largeEdges []uint32, err error) {
for _, hash := range hashes {
origIndex, _ := idx.GetIndexByHash(hash)
commitData, _ := idx.GetNodeByIndex(origIndex)
if _, err = e.Write(commitData.TreeHash[:]); err != nil {
return
}

var parent1, parent2 uint32
if len(commitData.ParentHashes) == 0 {
parent1 = parentNone
parent2 = parentNone
} else if len(commitData.ParentHashes) == 1 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = parentNone
} else if len(commitData.ParentHashes) == 2 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = hashToIndex[commitData.ParentHashes[1]]
} else if len(commitData.ParentHashes) > 2 {
parent1 = hashToIndex[commitData.ParentHashes[0]]
parent2 = uint32(len(largeEdges)) | parentOctopusUsed
for _, parentHash := range commitData.ParentHashes[1:] {
largeEdges = append(largeEdges, hashToIndex[parentHash])
}
largeEdges[len(largeEdges)-1] |= parentLast
}

if err = binary.WriteUint32(e, parent1); err == nil {
err = binary.WriteUint32(e, parent2)
}
if err != nil {
return
}

unixTime := uint64(commitData.When.Unix())
unixTime |= uint64(commitData.Generation) << 34
if err = binary.WriteUint64(e, unixTime); err != nil {
return
}
}
return
}

func (e *Encoder) encodeLargeEdges(largeEdges []uint32) (err error) {
for _, parent := range largeEdges {
if err = binary.WriteUint32(e, parent); err != nil {
return
}
}
return
}

func (e *Encoder) encodeChecksum() error {
_, err := e.Write(e.hash.Sum(nil)[:20])
return err
}
Loading

0 comments on commit 4a62292

Please sign in to comment.