Skip to content

Commit

Permalink
lfs: teach gitscanner how to emit lockable files that aren't lfs objects
Browse files Browse the repository at this point in the history
  • Loading branch information
technoweenie committed Feb 16, 2017
1 parent 7cf53a7 commit 08c5ae6
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 69 deletions.
23 changes: 15 additions & 8 deletions lfs/gitscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,24 @@ func IsCallbackMissing(err error) bool {

// GitScanner scans objects in a Git repository for LFS pointers.
type GitScanner struct {
Filter *filepathfilter.Filter
FoundPointer GitScannerFoundPointer
remote string
skippedRefs []string
Filter *filepathfilter.Filter
FoundPointer GitScannerFoundPointer
FoundLockable GitScannerFoundLockable
PotentialLockables GitScannerSet
remote string
skippedRefs []string

closed bool
started time.Time
mu sync.Mutex
}

type GitScannerFoundPointer func(*WrappedPointer, error)
type GitScannerFoundLockable func(filename string)

type GitScannerSet interface {
Contains(string) bool
}

// NewGitScanner initializes a *GitScanner for a Git repository in the current
// working directory.
Expand Down Expand Up @@ -82,7 +89,7 @@ func (s *GitScanner) ScanLeftToRemote(left string, cb GitScannerFoundPointer) er
}
s.mu.Unlock()

return scanRefsToChan(callback, left, "", s.opts(ScanLeftToRemoteMode))
return scanRefsToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode))
}

// ScanRefRange scans through all commits from the given left and right refs,
Expand All @@ -95,7 +102,7 @@ func (s *GitScanner) ScanRefRange(left, right string, cb GitScannerFoundPointer)

opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
return scanRefsToChan(callback, left, right, opts)
return scanRefsToChan(s, callback, left, right, opts)
}

// ScanRefWithDeleted scans through all objects in the given ref, including
Expand All @@ -114,7 +121,7 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error {

opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true
return scanRefsToChan(callback, ref, "", opts)
return scanRefsToChan(s, callback, ref, "", opts)
}

// ScanAll scans through all objects in the git repository.
Expand All @@ -126,7 +133,7 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {

opts := s.opts(ScanAllMode)
opts.SkipDeletedBlobs = false
return scanRefsToChan(callback, "", "", opts)
return scanRefsToChan(s, callback, "", "", opts)
}

// ScanTree takes a ref and returns WrappedPointer objects in the tree at that
Expand Down
42 changes: 27 additions & 15 deletions lfs/gitscanner_catfilebatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ import (
// runCatFileBatch uses 'git cat-file --batch' to get the object contents of a
// git object, given its sha1. The contents will be decoded into a Git LFS
// pointer. Git Blob SHA1s are read from the sha1Ch channel and fed to STDIN.
// Results are parsed from STDOUT, and any elegible LFS pointers are sent to
// pointerCh. Any errors are sent to errCh. An error is returned if the 'git
// cat-file' command fails to start.
func runCatFileBatch(pointerCh chan *WrappedPointer, revs *StringChannelWrapper, errCh chan error) error {
// Results are parsed from STDOUT, and any eligible LFS pointers are sent to
// pointerCh. If a Git Blob is not an LFS pointer, check the lockableSet to see
// if that blob is for a locked file. Any errors are sent to errCh. An error is
// returned if the 'git cat-file' command fails to start.
func runCatFileBatch(pointerCh chan *WrappedPointer, lockableCh chan string, lockableSet *lockableNameSet, revs *StringChannelWrapper, errCh chan error) error {
cmd, err := startCommand("git", "cat-file", "--batch")
if err != nil {
return err
Expand All @@ -28,12 +29,15 @@ func runCatFileBatch(pointerCh chan *WrappedPointer, revs *StringChannelWrapper,
for r := range revs.Results {
cmd.Stdin.Write([]byte(r + "\n"))
canScan := scanner.Scan()
if p := scanner.Pointer(); p != nil {
pointerCh <- p
}

if err := scanner.Err(); err != nil {
errCh <- err
} else if p := scanner.Pointer(); p != nil {
pointerCh <- p
} else if b := scanner.BlobSHA(); len(b) == 40 {
if name, ok := lockableSet.Check(b); ok {
lockableCh <- name
}
}

if !canScan {
Expand All @@ -55,17 +59,23 @@ func runCatFileBatch(pointerCh chan *WrappedPointer, revs *StringChannelWrapper,

close(pointerCh)
close(errCh)
close(lockableCh)
}()

return nil
}

type catFileBatchScanner struct {
r *bufio.Reader
blobSha string
pointer *WrappedPointer
err error
}

func (s *catFileBatchScanner) BlobSHA() string {
return s.blobSha
}

func (s *catFileBatchScanner) Pointer() *WrappedPointer {
return s.pointer
}
Expand All @@ -76,7 +86,8 @@ func (s *catFileBatchScanner) Err() error {

func (s *catFileBatchScanner) Scan() bool {
s.pointer, s.err = nil, nil
p, err := s.next()
b, p, err := s.next()
s.blobSha = b
s.pointer = p

if err != nil {
Expand All @@ -89,39 +100,40 @@ func (s *catFileBatchScanner) Scan() bool {
return true
}

func (s *catFileBatchScanner) next() (*WrappedPointer, error) {
func (s *catFileBatchScanner) next() (string, *WrappedPointer, error) {
l, err := s.r.ReadBytes('\n')
if err != nil {
return nil, err
return "", nil, err
}

// Line is formatted:
// <sha1> <type> <size>
fields := bytes.Fields(l)
if len(fields) < 3 {
return nil, errors.Wrap(fmt.Errorf("Invalid: %q", string(l)), "git cat-file --batch")
return "", nil, errors.Wrap(fmt.Errorf("Invalid: %q", string(l)), "git cat-file --batch")
}

blobSha := string(fields[0])
size, _ := strconv.Atoi(string(fields[2]))
buf := make([]byte, size)
read, err := io.ReadFull(s.r, buf)
if err != nil {
return nil, err
return blobSha, nil, err
}

if size != read {
return nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
return blobSha, nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
}

p, err := DecodePointer(bytes.NewBuffer(buf[:read]))
var pointer *WrappedPointer
if err == nil {
pointer = &WrappedPointer{
Sha1: string(fields[0]),
Sha1: blobSha,
Pointer: p,
}
}

_, err = s.r.ReadBytes('\n') // Extra \n inserted by cat-file
return pointer, err
return blobSha, pointer, err
}
52 changes: 30 additions & 22 deletions lfs/gitscanner_catfilebatchcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ import (

// runCatFileBatchCheck uses 'git cat-file --batch-check' to get the type and
// size of a git object. Any object that isn't of type blob and under the
// blobSizeCutoff will be ignored. revs is a channel over which strings
// containing git sha1s will be sent. It returns a channel from which sha1
// strings can be read.
func runCatFileBatchCheck(smallRevCh chan string, revs *StringChannelWrapper, errCh chan error) error {
// blobSizeCutoff will be ignored, unless it's a locked file. revs is a channel
// over which strings containing git sha1s will be sent. It returns a channel
// from which sha1 strings can be read.
func runCatFileBatchCheck(smallRevCh chan string, lockableCh chan string, lockableSet *lockableNameSet, revs *StringChannelWrapper, errCh chan error) error {
cmd, err := startCommand("git", "cat-file", "--batch-check")
if err != nil {
return err
Expand All @@ -23,12 +23,14 @@ func runCatFileBatchCheck(smallRevCh chan string, revs *StringChannelWrapper, er
for r := range revs.Results {
cmd.Stdin.Write([]byte(r + "\n"))
hasNext := scanner.Scan()
if b := scanner.BlobOID(); len(b) > 0 {
smallRevCh <- b
}

if err := scanner.Err(); err != nil {
errCh <- err
} else if b := scanner.LFSBlobOID(); len(b) > 0 {
smallRevCh <- b
} else if b := scanner.GitBlobOID(); len(b) > 0 {
if name, ok := lockableSet.Check(b); ok {
lockableCh <- name
}
}

if !hasNext {
Expand All @@ -54,27 +56,32 @@ func runCatFileBatchCheck(smallRevCh chan string, revs *StringChannelWrapper, er
}

type catFileBatchCheckScanner struct {
s *bufio.Scanner
limit int
blobOID string
s *bufio.Scanner
limit int
lfsBlobOID string
gitBlobOID string
}

func (s *catFileBatchCheckScanner) LFSBlobOID() string {
return s.lfsBlobOID
}

func (s *catFileBatchCheckScanner) BlobOID() string {
return s.blobOID
func (s *catFileBatchCheckScanner) GitBlobOID() string {
return s.gitBlobOID
}

func (s *catFileBatchCheckScanner) Err() error {
return s.s.Err()
}

func (s *catFileBatchCheckScanner) Scan() bool {
s.blobOID = ""
b, hasNext := s.next()
s.blobOID = b
lfsBlobSha, gitBlobSha, hasNext := s.next()
s.lfsBlobOID = lfsBlobSha
s.gitBlobOID = gitBlobSha
return hasNext
}

func (s *catFileBatchCheckScanner) next() (string, bool) {
func (s *catFileBatchCheckScanner) next() (string, string, bool) {
hasNext := s.s.Scan()
line := s.s.Text()
lineLen := len(line)
Expand All @@ -84,21 +91,22 @@ func (s *catFileBatchCheckScanner) next() (string, bool) {
// type is at a fixed spot, if we see that it's "blob", we can avoid
// splitting the line just to get the size.
if lineLen < 46 {
return "", hasNext
return "", "", hasNext
}

if line[41:45] != "blob" {
return "", hasNext
return "", "", hasNext
}

size, err := strconv.Atoi(line[46:lineLen])
if err != nil {
return "", hasNext
return "", "", hasNext
}

blobSha := line[0:40]
if size >= s.limit {
return "", hasNext
return "", blobSha, hasNext
}

return line[0:40], hasNext
return blobSha, "", hasNext
}
20 changes: 11 additions & 9 deletions lfs/gitscanner_catfilebatchcheckscanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ func TestCatFileBatchCheckScannerWithValidOutput(t *testing.T) {
limit: 1024,
}

assertNextOID(t, s, "")
assertNextOID(t, s, "")
assertNextOID(t, s, "")
assertNextOID(t, s, "0000000000000000000000000000000000000002")
assertNextOID(t, s, "")
assertNextOID(t, s, "")
assertNextOID(t, s, "", "")
assertNextOID(t, s, "", "")
assertNextOID(t, s, "", "")
assertNextOID(t, s, "0000000000000000000000000000000000000002", "")
assertNextOID(t, s, "", "")
assertNextOID(t, s, "", "0000000000000000000000000000000000000004")
assertScannerDone(t, s)
assert.Equal(t, "", s.BlobOID())
assert.Equal(t, "", s.LFSBlobOID())
assert.Equal(t, "", s.GitBlobOID())
}

type stringScanner interface {
Expand All @@ -49,9 +50,10 @@ func assertNextScan(t *testing.T, scanner genericScanner) {
assert.Nil(t, scanner.Err())
}

func assertNextOID(t *testing.T, scanner *catFileBatchCheckScanner, oid string) {
func assertNextOID(t *testing.T, scanner *catFileBatchCheckScanner, lfsBlobOID, gitBlobOID string) {
assertNextScan(t, scanner)
assert.Equal(t, oid, scanner.BlobOID())
assert.Equal(t, lfsBlobOID, scanner.LFSBlobOID())
assert.Equal(t, gitBlobOID, scanner.GitBlobOID())
}

func assertScannerDone(t *testing.T, scanner genericScanner) {
Expand Down
4 changes: 2 additions & 2 deletions lfs/gitscanner_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ func scanIndex(cb GitScannerFoundPointer, ref string) error {
close(allRevsErr)
}()

smallShas, err := catFileBatchCheck(allRevs)
smallShas, _, err := catFileBatchCheck(allRevs, nil)
if err != nil {
return err
}

ch := make(chan gitscannerResult, chanBufSize)

barePointerCh, err := catFileBatch(smallShas)
barePointerCh, _, err := catFileBatch(smallShas, nil)
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit 08c5ae6

Please sign in to comment.