Skip to content

Commit

Permalink
On open repository open common cat file batch and batch-check (#15667)
Browse files Browse the repository at this point in the history
Use common git cat-file --batch and git cat-file --batch-check to
significantly reduce calls to git.
    
Signed-off-by: Andrew Thornton <art27@cantab.net>
  • Loading branch information
zeripath authored May 10, 2021
1 parent 038e1db commit 270aab4
Show file tree
Hide file tree
Showing 26 changed files with 463 additions and 161 deletions.
8 changes: 7 additions & 1 deletion modules/context/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -905,12 +905,18 @@ func (ctx *Context) IssueTemplatesFromDefaultBranch() []api.IssueTemplate {
log.Debug("DataAsync: %v", err)
continue
}
defer r.Close()
closed := false
defer func() {
if !closed {
_ = r.Close()
}
}()
data, err := ioutil.ReadAll(r)
if err != nil {
log.Debug("ReadAll: %v", err)
continue
}
_ = r.Close()
var it api.IssueTemplate
content, err := markdown.ExtractMetadata(string(data), &it)
if err != nil {
Expand Down
55 changes: 46 additions & 9 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,44 @@ import (
"strings"
)

// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
type WriteCloserError interface {
io.WriteCloser
CloseWithError(err error) error
}

// CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
cancel := func() {
_ = batchStdinReader.Close()
_ = batchStdinWriter.Close()
_ = batchStdoutReader.Close()
_ = batchStdoutWriter.Close()
}

go func() {
stderr := strings.Builder{}
err := NewCommand("cat-file", "--batch-check").RunInDirFullPipeline(repoPath, batchStdoutWriter, &stderr, batchStdinReader)
if err != nil {
_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
} else {
_ = batchStdoutWriter.Close()
_ = batchStdinReader.Close()
}
}()

// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
batchReader := bufio.NewReader(batchStdoutReader)

return batchStdinWriter, batchReader, cancel
}

// CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) {
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
Expand Down Expand Up @@ -47,26 +82,28 @@ func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) {
// ReadBatchLine reads the header line from cat-file --batch
// We expect:
// <sha> SP <type> SP <size> LF
// sha is a 40byte not 20byte here
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
sha, err = rd.ReadBytes(' ')
if err != nil {
return
}
sha = sha[:len(sha)-1]

typ, err = rd.ReadString(' ')
typ, err = rd.ReadString('\n')
if err != nil {
return
}
typ = typ[:len(typ)-1]

var sizeStr string
sizeStr, err = rd.ReadString('\n')
if err != nil {
idx := strings.Index(typ, " ")
if idx < 0 {
err = ErrNotExist{ID: string(sha)}
return
}
sizeStr := typ[idx+1 : len(typ)-1]
typ = typ[:idx]

size, err = strconv.ParseInt(sizeStr[:len(sizeStr)-1], 10, 64)
size, err = strconv.ParseInt(sizeStr, 10, 64)
return
}

Expand Down Expand Up @@ -128,7 +165,7 @@ headerLoop:
}

// Discard the rest of the commit
discard := size - n
discard := size - n + 1
for discard > math.MaxInt32 {
_, err := rd.Discard(math.MaxInt32)
if err != nil {
Expand Down
114 changes: 84 additions & 30 deletions modules/git/blob_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,48 +8,54 @@ package git

import (
"bufio"
"bytes"
"io"
"strconv"
"strings"
"io/ioutil"
"math"
)

// Blob represents a Git object.
type Blob struct {
ID SHA1

gotSize bool
size int64
repoPath string
name string
gotSize bool
size int64
name string
repo *Repository
}

// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
stdoutReader, stdoutWriter := io.Pipe()
wr, rd, cancel := b.repo.CatFileBatch()

go func() {
stderr := &strings.Builder{}
err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(b.repoPath, stdoutWriter, stderr, strings.NewReader(b.ID.String()+"\n"))
if err != nil {
err = ConcatenateError(err, stderr.String())
_ = stdoutWriter.CloseWithError(err)
} else {
_ = stdoutWriter.Close()
}
}()

bufReader := bufio.NewReader(stdoutReader)
_, _, size, err := ReadBatchLine(bufReader)
_, err := wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
stdoutReader.Close()
cancel()
return nil, err
}
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
return nil, err
}
b.gotSize = true
b.size = size

return &LimitedReaderCloser{
R: bufReader,
C: stdoutReader,
N: size,
if size < 4096 {
bs, err := ioutil.ReadAll(io.LimitReader(rd, size))
if err != nil {
cancel()
return nil, err
}
_, err = rd.Discard(1)
return ioutil.NopCloser(bytes.NewReader(bs)), err
}

return &blobReader{
rd: rd,
n: size,
cancel: cancel,
}, nil
}

Expand All @@ -59,18 +65,66 @@ func (b *Blob) Size() int64 {
return b.size
}

size, err := NewCommand("cat-file", "-s", b.ID.String()).RunInDir(b.repoPath)
wr, rd, cancel := b.repo.CatFileBatchCheck()
defer cancel()
_, err := wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repoPath, err)
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}

b.size, err = strconv.ParseInt(size[:len(size)-1], 10, 64)
_, _, b.size, err = ReadBatchLine(rd)
if err != nil {
log("error whilst parsing size %s for %s in %s. Error: %v", size, b.ID.String(), b.repoPath, err)
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}

b.gotSize = true

return b.size
}

type blobReader struct {
rd *bufio.Reader
n int64
cancel func()
}

func (b *blobReader) Read(p []byte) (n int, err error) {
if b.n <= 0 {
return 0, io.EOF
}
if int64(len(p)) > b.n {
p = p[0:b.n]
}
n, err = b.rd.Read(p)
b.n -= int64(n)
return
}

// Close implements io.Closer
func (b *blobReader) Close() error {
if b.n > 0 {
for b.n > math.MaxInt32 {
n, err := b.rd.Discard(math.MaxInt32)
b.n -= int64(n)
if err != nil {
b.cancel()
return err
}
b.n -= math.MaxInt32
}
n, err := b.rd.Discard(int(b.n))
b.n -= int64(n)
if err != nil {
b.cancel()
return err
}
}
if b.n == 0 {
_, err := b.rd.Discard(1)
b.n--
b.cancel()
return err
}
return nil
}
5 changes: 3 additions & 2 deletions modules/git/blob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ func TestBlob_Data(t *testing.T) {
r, err := testBlob.DataAsync()
assert.NoError(t, err)
require.NotNil(t, r)
defer r.Close()

data, err := ioutil.ReadAll(r)
assert.NoError(t, r.Close())

assert.NoError(t, err)
assert.Equal(t, output, string(data))
}
Expand All @@ -54,7 +55,7 @@ func Benchmark_Blob_Data(b *testing.B) {
if err != nil {
b.Fatal(err)
}
defer r.Close()
ioutil.ReadAll(r)
_ = r.Close()
}
}
14 changes: 12 additions & 2 deletions modules/git/commit_info_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache *LastCo
}

func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*Commit, []string, error) {
wr, rd, cancel := CatFileBatch(cache.repo.Path)
wr, rd, cancel := cache.repo.CatFileBatch()
defer cancel()

var unHitEntryPaths []string
Expand Down Expand Up @@ -144,7 +144,7 @@ func GetLastCommitForPaths(commit *Commit, treePath string, paths []string) ([]*
}
}()

batchStdinWriter, batchReader, cancel := CatFileBatch(commit.repo.Path)
batchStdinWriter, batchReader, cancel := commit.repo.CatFileBatch()
defer cancel()

mapsize := 4096
Expand Down Expand Up @@ -237,6 +237,10 @@ revListLoop:
// FIXME: is there any order to the way strings are emitted from cat-file?
// if there is - then we could skip once we've passed all of our data
}
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}

break treeReadingLoop
}

Expand Down Expand Up @@ -281,6 +285,9 @@ revListLoop:
return nil, err
}
}
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}

// if we haven't found a treeID for the target directory our search is over
if len(treeID) == 0 {
Expand Down Expand Up @@ -345,6 +352,9 @@ revListLoop:
if err != nil {
return nil, err
}
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
commitCommits[i] = c
}

Expand Down
3 changes: 1 addition & 2 deletions modules/git/last_commit_cache_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ package git

import (
"bufio"
"io"
"path"
)

Expand Down Expand Up @@ -36,7 +35,7 @@ func NewLastCommitCache(repoPath string, gitRepo *Repository, ttl func() int64,
}

// Get get the last commit information by commit id and entry path
func (c *LastCommitCache) Get(ref, entryPath string, wr *io.PipeWriter, rd *bufio.Reader) (interface{}, error) {
func (c *LastCommitCache) Get(ref, entryPath string, wr WriteCloserError, rd *bufio.Reader) (interface{}, error) {
v := c.cache.Get(c.getCacheKey(c.repoPath, ref, entryPath))
if vs, ok := v.(string); ok {
log("LastCommitCache hit level 1: [%s:%s:%s]", ref, entryPath, vs)
Expand Down
9 changes: 8 additions & 1 deletion modules/git/notes_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,18 @@ func GetNote(repo *Repository, commitID string, note *Note) error {
if err != nil {
return err
}
defer dataRc.Close()
closed := false
defer func() {
if !closed {
_ = dataRc.Close()
}
}()
d, err := ioutil.ReadAll(dataRc)
if err != nil {
return err
}
_ = dataRc.Close()
closed = true
note.Message = d

treePath := ""
Expand Down
Loading

0 comments on commit 270aab4

Please sign in to comment.