Skip to content

Commit

Permalink
fix race condition when multiple instance of git clone may get launch…
Browse files Browse the repository at this point in the history
…ed at the same time
  • Loading branch information
badjware committed Mar 3, 2022
1 parent 3cffbbf commit d0bc974
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 136 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ To reduce the number of calls to the Gitlab api and improve the responsiveness o
While the filesystem lives in memory, the git repositories that are cloned are saved on disk. By default, they are saved in `$XDG_DATA_HOME/gitlabfs` or `$HOME/.local/share/gitlabfs`, if `$XDG_DATA_HOME` is unset. `gitlabfs` symlink to the local clone of that repo. The local clone is unaffected by project rename or archive/unarchive in Gitlab and a given project will always point to the correct local folder.

## Known issues / Future improvements
* There is a race condition that could happen when interacting with git that needs to be fixed.
* Cloning and pulling repositories is currently very resource-intensive, especially on large set of repositories. Need to track down what causes this to happen. For now, leaving `on_clone` set to `init` and `auto_pull` to `false` in the configuration avoids the issue.
* ~~There is a race condition that could happen when interacting with git that needs to be fixed.~~
* ~~Cloning and pulling repositories is currently very resource-intensive, especially on large set of repositories. Need to track down what causes this to happen. For now, leaving `on_clone` set to `init` and `auto_pull` to `false` in the configuration avoids the issue.~~
* Cache persists forever until a manual refresh is requested. Some way to automatically refresh would be nice.
* The filesystem is currently read-only. Implementing `mkdir` to create groups, `ln` or `touch` to create projects, etc. would be nice.
* Code need some cleanup and could maybe be optimized here and there.
Expand Down
14 changes: 4 additions & 10 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,8 @@ git:
# The depth of the git history to pull. Set to 0 to pull the full history.
depth: 1

# The number of `git clone` operation that can be queued up
clone_queue_size: 200
# The number of git operations that can be queued up
queue_size: 200

# The number of parallel `git clone` operation that is allowed to run at once
clone_worker_count: 5

# The number of `git pull` operation that can be queued up
pull_queue_size: 500

# The number of parallel `git pull` operation that is allowed to run at once
pull_worker_count: 5
# The number of parallel git operations that is allowed to run at once
worker_count: 5
72 changes: 36 additions & 36 deletions git/client.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package git

import (
"errors"
"context"
"net/url"
"os"
"path/filepath"
"strconv"
"time"

"github.com/vmihailenco/taskq/v3"
"github.com/vmihailenco/taskq/v3/memqueue"
)

const (
Expand All @@ -25,34 +29,42 @@ type GitClientParam struct {
PullDepth int
AutoPull bool

CloneBuffSize int
CloneWorkerCount int
PullBuffSize int
PullWorkerCount int
QueueSize int
QueueWorkerCount int
}

type gitClient struct {
GitClientParam
cloneChan chan *gitCloneParam
pullChan chan *gitPullParam
queue taskq.Queue
cloneTask *taskq.Task
pullTask *taskq.Task
}

func NewClient(p GitClientParam) (*gitClient, error) {
queueFactory := memqueue.NewFactory()
// Create the client
c := &gitClient{
GitClientParam: p,
cloneChan: make(chan *gitCloneParam, p.CloneBuffSize),
pullChan: make(chan *gitPullParam, p.PullBuffSize),
}

// Start worker goroutines
for i := 0; i < p.CloneWorkerCount; i++ {
go c.cloneWorker()
}
for i := 0; i < p.PullWorkerCount; i++ {
go c.pullWorker()
queue: queueFactory.RegisterQueue(&taskq.QueueOptions{
Name: "git-queue",
MaxNumWorker: int32(p.QueueWorkerCount),
BufferSize: p.QueueSize,
Storage: taskq.NewLocalStorage(),
}),
}

c.cloneTask = taskq.RegisterTask(&taskq.TaskOptions{
Name: "git-clone",
Handler: c.clone,
RetryLimit: 1,
})
c.pullTask = taskq.RegisterTask(&taskq.TaskOptions{
Name: "git-pull",
Handler: c.pull,
RetryLimit: 1,
})

return c, nil
}

Expand All @@ -62,28 +74,16 @@ func (c *gitClient) getLocalRepoLoc(pid int) string {

func (c *gitClient) CloneOrPull(url string, pid int, defaultBranch string) (localRepoLoc string, err error) {
localRepoLoc = c.getLocalRepoLoc(pid)
// TODO: Better manage concurrency, filter out duplicate requests
if _, err := os.Stat(localRepoLoc); os.IsNotExist(err) {
// Dispatch to clone worker
select {
case c.cloneChan <- &gitCloneParam{
url: url,
defaultBranch: defaultBranch,
dst: localRepoLoc,
}:
default:
return localRepoLoc, errors.New("failed to clone local repo")
}
// Dispatch clone msg
msg := c.cloneTask.WithArgs(context.Background(), url, defaultBranch, localRepoLoc)
msg.OnceInPeriod(time.Second, pid)
c.queue.Add(msg)
} else if c.AutoPull {
// Dispatch to pull worker
select {
case c.pullChan <- &gitPullParam{
repoPath: localRepoLoc,
defaultBranch: defaultBranch,
}:
default:
return localRepoLoc, errors.New("failed to pull local repo")
}
// Dispatch pull msg
msg := c.pullTask.WithArgs(context.Background(), localRepoLoc, defaultBranch)
msg.OnceInPeriod(time.Second, pid)
c.queue.Add(msg)
}
return localRepoLoc, nil
}
57 changes: 19 additions & 38 deletions git/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,65 @@ package git

import (
"fmt"
"os"
"strconv"

"github.com/badjware/gitlabfs/utils"
)

type gitCloneParam struct {
url string
defaultBranch string
dst string
}

func (c *gitClient) cloneWorker() {
fmt.Println("Started git cloner worker routine")

for gcp := range c.cloneChan {
if _, err := os.Stat(gcp.dst); os.IsNotExist(err) {
if err := c.clone(gcp); err != nil {
fmt.Println(err)
}
}
}
}

func (c *gitClient) clone(gcp *gitCloneParam) error {
func (c *gitClient) clone(url string, defaultBranch string, dst string) error {
if c.CloneMethod == CloneInit {
// "Fake" cloning the repo by never actually talking to the git server
// This skip a fetch operation that we would do if we where to do a proper clone
// We can save a lot of time and network i/o doing it this way, at the cost of
// resulting in a very barebone local copy

// Init the local repo
fmt.Printf("Initializing %v into %v\n", gcp.url, gcp.dst)
fmt.Printf("Initializing %v into %v\n", url, dst)
_, err := utils.ExecProcess(
"git", "init",
"--initial-branch", gcp.defaultBranch,
"--initial-branch", defaultBranch,
"--",
gcp.dst, // directory
dst, // directory
)
if err != nil {
return fmt.Errorf("failed to init git repo %v to %v: %v", gcp.url, gcp.dst, err)
return fmt.Errorf("failed to init git repo %v to %v: %v", url, dst, err)
}

// Configure the remote
_, err = utils.ExecProcessInDir(
gcp.dst, // workdir
dst, // workdir
"git", "remote", "add",
"-m", gcp.defaultBranch,
"-m", defaultBranch,
"--",
c.RemoteName, // name
gcp.url, // url
url, // url
)
if err != nil {
return fmt.Errorf("failed to setup remote %v in git repo %v: %v", gcp.url, gcp.dst, err)
return fmt.Errorf("failed to setup remote %v in git repo %v: %v", url, dst, err)
}

// Configure the default branch
_, err = utils.ExecProcessInDir(
gcp.dst, // workdir
dst, // workdir
"git", "config", "--local",
"--",
fmt.Sprintf("branch.%s.remote", gcp.defaultBranch), // key
fmt.Sprintf("branch.%s.remote", defaultBranch), // key
c.RemoteName, // value

)
if err != nil {
return fmt.Errorf("failed to setup default branch remote in git repo %v: %v", gcp.dst, err)
return fmt.Errorf("failed to setup default branch remote in git repo %v: %v", dst, err)
}
_, err = utils.ExecProcessInDir(
gcp.dst, // workdir
dst, // workdir
"git", "config", "--local",
"--",
fmt.Sprintf("branch.%s.merge", gcp.defaultBranch), // key
fmt.Sprintf("refs/heads/%s", gcp.defaultBranch), // value
fmt.Sprintf("branch.%s.merge", defaultBranch), // key
fmt.Sprintf("refs/heads/%s", defaultBranch), // value

)
if err != nil {
return fmt.Errorf("failed to setup default branch merge in git repo %v: %v", gcp.dst, err)
return fmt.Errorf("failed to setup default branch merge in git repo %v: %v", dst, err)
}
} else {
// Clone the repo
Expand All @@ -88,11 +69,11 @@ func (c *gitClient) clone(gcp *gitCloneParam) error {
"--origin", c.RemoteName,
"--depth", strconv.Itoa(c.PullDepth),
"--",
gcp.url, // repository
gcp.dst, // directory
url, // repository
dst, // directory
)
if err != nil {
return fmt.Errorf("failed to clone git repo %v to %v: %v", gcp.url, gcp.dst, err)
return fmt.Errorf("failed to clone git repo %v to %v: %v", url, dst, err)
}
}
return nil
Expand Down
33 changes: 9 additions & 24 deletions git/pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,32 @@ import (
"github.com/badjware/gitlabfs/utils"
)

type gitPullParam struct {
repoPath string
defaultBranch string
}

func (c *gitClient) pullWorker() {
fmt.Println("Started git puller worker routine")

for gpp := range c.pullChan {
if err := c.pull(gpp); err != nil {
fmt.Println(err)
}
}
}

func (c *gitClient) pull(gpp *gitPullParam) error {
func (c *gitClient) pull(repoPath string, defaultBranch string) error {
// Check if the local repo is on default branch
branchName, err := utils.ExecProcessInDir(
gpp.repoPath, // workdir
repoPath, // workdir
"git", "branch",
"--show-current",
)
if err != nil {
return fmt.Errorf("failed to retrieve HEAD of git repo %v: %v", gpp.repoPath, err)
return fmt.Errorf("failed to retrieve HEAD of git repo %v: %v", repoPath, err)
}

if branchName == gpp.defaultBranch {
if branchName == defaultBranch {
// Pull the repo
_, err = utils.ExecProcessInDir(
gpp.repoPath, // workdir
repoPath, // workdir
"git", "pull",
"--depth", strconv.Itoa(c.PullDepth),
"--",
c.RemoteName, // repository
gpp.defaultBranch, // refspec
c.RemoteName, // repository
defaultBranch, // refspec
)
if err != nil {
return fmt.Errorf("failed to pull git repo %v: %v", gpp.repoPath, err)
return fmt.Errorf("failed to pull git repo %v: %v", repoPath, err)
}
} else {
fmt.Printf("%v != %v, skipping pull", branchName, gpp.defaultBranch)
fmt.Printf("%v != %v, skipping pull", branchName, defaultBranch)
}

return nil
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ module github.com/badjware/gitlabfs
go 1.15

require (
github.com/golang/protobuf v1.5.1 // indirect
github.com/bsm/redislock v0.7.2 // indirect
github.com/google/go-querystring v1.1.0 // indirect
github.com/hanwen/go-fuse/v2 v2.1.0
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-retryablehttp v0.6.8 // indirect
github.com/klauspost/compress v1.14.4 // indirect
github.com/vmihailenco/taskq/v3 v3.2.9-0.20211122085105-720ffc56ac4d
github.com/xanzy/go-gitlab v0.47.0
golang.org/x/net v0.0.0-20210323141857-08027d57d8cf // indirect
golang.org/x/oauth2 v0.0.0-20210323180902-22b0adad7558 // indirect
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 // indirect
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba // indirect
google.golang.org/appengine v1.6.7 // indirect
gopkg.in/yaml.v2 v2.4.0
Expand Down
Loading

0 comments on commit d0bc974

Please sign in to comment.