Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync branches to DB immediately when handle git hook calling #29493

Merged
merged 12 commits into from
Mar 6, 2024
5 changes: 5 additions & 0 deletions models/git/branch.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,11 @@ func GetBranch(ctx context.Context, repoID int64, branchName string) (*Branch, e
return &branch, nil
}

func GetBranches(ctx context.Context, repoID int64, branchNames []string) ([]*Branch, error) {
branches := make([]*Branch, 0, len(branchNames))
return branches, db.GetEngine(ctx).Where("repo_id=?", repoID).In("name", branchNames).Find(&branches)
lunny marked this conversation as resolved.
Show resolved Hide resolved
}

func AddBranches(ctx context.Context, branches []*Branch) error {
for _, branch := range branches {
if _, err := db.GetEngine(ctx).Insert(branch); err != nil {
Expand Down
66 changes: 65 additions & 1 deletion routers/private/hook_post_receive.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ import (
"net/http"
"strconv"

git_model "code.gitea.io/gitea/models/git"
issues_model "code.gitea.io/gitea/models/issues"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/private"
repo_module "code.gitea.io/gitea/modules/repository"
Expand All @@ -27,14 +29,19 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {

// We don't rely on RepoAssignment here because:
// a) we don't need the git repo in this function
// OUT OF DATE: we do need the git repo to sync the branch to the db now.
// b) our update function will likely change the repository in the db so we will need to refresh it
// c) we don't always need the repo

ownerName := ctx.Params(":owner")
repoName := ctx.Params(":repo")

// defer getting the repository at this point - as we should only retrieve it if we're going to call update
var repo *repo_model.Repository
var (
repo *repo_model.Repository
gitRepo *git.Repository
)
defer gitRepo.Close() // it's safe to call Close on a nil pointer

updates := make([]*repo_module.PushUpdateOptions, 0, len(opts.OldCommitIDs))
wasEmpty := false
Expand Down Expand Up @@ -87,6 +94,63 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {
})
return
}

branchesToSync := make([]*repo_module.PushUpdateOptions, 0, len(updates))
for _, update := range updates {
if !update.RefFullName.IsBranch() {
continue
}
if repo == nil {
repo = loadRepository(ctx, ownerName, repoName)
if ctx.Written() {
return
}
wasEmpty = repo.IsEmpty
}

if update.IsDelRef() {
if err := git_model.AddDeletedBranch(ctx, repo.ID, update.RefFullName.BranchName(), update.PusherID); err != nil {
log.Error("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err)
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
} else {
branchesToSync = append(branchesToSync, update)
}
}
if len(branchesToSync) > 0 {
if gitRepo == nil {
var err error
gitRepo, err = gitrepo.OpenRepository(ctx, repo)
if err != nil {
log.Error("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err)
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
}
wolfogre marked this conversation as resolved.
Show resolved Hide resolved

var (
branchNames = make([]string, 0, len(branchesToSync))
commitIDs = make([]string, 0, len(branchesToSync))
)
for _, update := range branchesToSync {
branchNames = append(branchNames, update.RefFullName.BranchName())
lunny marked this conversation as resolved.
Show resolved Hide resolved
commitIDs = append(commitIDs, update.NewCommitID)
}

if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, func(commitID string) (*git.Commit, error) {
return gitRepo.GetCommit(commitID)
}); err != nil {
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
}
}

// Handle Push Options
Expand Down
117 changes: 82 additions & 35 deletions services/repository/branch.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,44 +221,91 @@ func checkBranchName(ctx context.Context, repo *repo_model.Repository, name stri
return err
}

// syncBranchToDB sync the branch information in the database. It will try to update the branch first,
// if updated success with affect records > 0, then all are done. Because that means the branch has been in the database.
// If no record is affected, that means the branch does not exist in database. So there are two possibilities.
// One is this is a new branch, then we just need to insert the record. Another is the branches haven't been synced,
// then we need to sync all the branches into database.
func syncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName string, commit *git.Commit) error {
cnt, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit)
if err != nil {
return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err)
}
if cnt > 0 { // This means branch does exist, so it's a normal update. It also means the branch has been synced.
return nil
}
// SyncBranchesToDB sync the branch information in the database.
// It will check whether the branches of the repository have never been synced before.
// If so, it will sync all branches of the repository.
// Otherwise, it will sync the branches that need to be updated.
func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames, commitIDs []string, getCommit func(commitID string) (*git.Commit, error)) error {
// Some designs that make the code look strange but are made for performance optimization purposes:
// 1. Sync branches in a batch to reduce the number of DB queries.
// 2. Lazy load commit information since it may be not necessary.
// 3. Exit early if synced all branches of git repo when there's no branch in DB.
// 4. Check the branches in DB if they are already synced.
//
// If the user pushes many branches at once, the Git hook will call the internal API in batches, rather than all at once.
// See https://github.com/go-gitea/gitea/blob/cb52b17f92e2d2293f7c003649743464492bca48/cmd/hook.go#L27
// For the first batch, it will hit optimization 3.
// For other batches, it will hit optimization 4.

if len(branchNames) != len(commitIDs) {
return fmt.Errorf("branchNames and commitIDs length not match")
}

return db.WithTx(ctx, func(ctx context.Context) error {
branches, err := git_model.GetBranches(ctx, repoID, branchNames)
if err != nil {
return fmt.Errorf("git_model.GetBranches: %v", err)
}

// if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21,
// we cannot simply insert the branch but need to check we have branches or not
hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{
RepoID: repoID,
IsDeletedBranch: optional.Some(false),
}.ToConds())
if err != nil {
return err
}
if !hasBranch {
if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil {
return fmt.Errorf("repo_module.SyncRepoBranches %d:%s failed: %v", repoID, branchName, err)
if len(branches) == 0 {
// if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21,
// we cannot simply insert the branch but need to check we have branches or not
hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{
RepoID: repoID,
IsDeletedBranch: optional.Some(false),
}.ToConds())
if err != nil {
return err
}
if !hasBranch {
if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil {
return fmt.Errorf("repo_module.SyncRepoBranches %d failed: %v", repoID, err)
}
return nil
}
}
return nil
}

// if database have branches but not this branch, it means this is a new branch
return db.Insert(ctx, &git_model.Branch{
RepoID: repoID,
Name: branchName,
CommitID: commit.ID.String(),
CommitMessage: commit.Summary(),
PusherID: pusherID,
CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()),
branchMap := make(map[string]*git_model.Branch, len(branches))
for _, branch := range branches {
branchMap[branch.Name] = branch
}

newBranches := make([]*git_model.Branch, 0, len(branchNames))

for i, branchName := range branchNames {
commitID := commitIDs[i]
branch, exist := branchMap[branchName]
if exist && branch.CommitID == commitID {
continue
}

commit, err := getCommit(branchName)
if err != nil {
return fmt.Errorf("get commit of %s failed: %v", branchName, err)
}

if exist {
if _, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit); err != nil {
return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err)
}
return nil
}

// if database have branches but not this branch, it means this is a new branch
newBranches = append(newBranches, &git_model.Branch{
RepoID: repoID,
Name: branchName,
CommitID: commit.ID.String(),
CommitMessage: commit.Summary(),
PusherID: pusherID,
CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()),
})
}

if len(newBranches) > 0 {
return db.Insert(ctx, newBranches)
}
return nil
})
}

Expand Down
9 changes: 0 additions & 9 deletions services/repository/push.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"time"

"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/cache"
Expand Down Expand Up @@ -259,10 +258,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error {
commits.Commits = commits.Commits[:setting.UI.FeedMaxCommitNum]
}

if err = syncBranchToDB(ctx, repo.ID, opts.PusherID, branch, newCommit); err != nil {
return fmt.Errorf("git_model.UpdateBranch %s:%s failed: %v", repo.FullName(), branch, err)
}

notify_service.PushCommits(ctx, pusher, repo, opts, commits)

// Cache for big repository
Expand All @@ -275,10 +270,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error {
// close all related pulls
log.Error("close related pull request failed: %v", err)
}

if err := git_model.AddDeletedBranch(ctx, repo.ID, branch, pusher.ID); err != nil {
return fmt.Errorf("AddDeletedBranch %s:%s failed: %v", repo.FullName(), branch, err)
}
}

// Even if user delete a branch on a repository which he didn't watch, he will be watch that.
Expand Down
131 changes: 131 additions & 0 deletions tests/integration/git_push_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package integration

import (
"fmt"
"net/url"
"testing"

"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
repo_service "code.gitea.io/gitea/services/repository"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestGitPush(t *testing.T) {
onGiteaRun(t, testGitPush)
}

func testGitPush(t *testing.T, u *url.URL) {
t.Run("Push branches at once", func(t *testing.T) {
runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) {
for i := 0; i < 100; i++ {
branchName := fmt.Sprintf("branch-%d", i)
pushed = append(pushed, branchName)
doGitCreateBranch(gitPath, branchName)(t)
}
pushed = append(pushed, "master")
doGitPushTestRepository(gitPath, "origin", "--all")(t)
return pushed, deleted
})
})

t.Run("Push branches one by one", func(t *testing.T) {
runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) {
for i := 0; i < 100; i++ {
branchName := fmt.Sprintf("branch-%d", i)
doGitCreateBranch(gitPath, branchName)(t)
doGitPushTestRepository(gitPath, "origin", branchName)(t)
pushed = append(pushed, branchName)
}
return pushed, deleted
})
})

t.Run("Delete branches", func(t *testing.T) {
runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) {
doGitPushTestRepository(gitPath, "origin", "master")(t) // make sure master is the default branch instead of a branch we are going to delete
pushed = append(pushed, "master")

for i := 0; i < 100; i++ {
branchName := fmt.Sprintf("branch-%d", i)
pushed = append(pushed, branchName)
doGitCreateBranch(gitPath, branchName)(t)
}
doGitPushTestRepository(gitPath, "origin", "--all")(t)

for i := 0; i < 10; i++ {
branchName := fmt.Sprintf("branch-%d", i)
doGitPushTestRepository(gitPath, "origin", "--delete", branchName)(t)
deleted = append(deleted, branchName)
}
return pushed, deleted
})
})
}

func runTestGitPush(t *testing.T, u *url.URL, gitOperation func(t *testing.T, gitPath string) (pushed, deleted []string)) {
user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
repo, err := repo_service.CreateRepository(db.DefaultContext, user, user, repo_service.CreateRepoOptions{
Name: "repo-to-push",
Description: "test git push",
AutoInit: false,
DefaultBranch: "main",
IsPrivate: false,
})
require.NoError(t, err)
require.NotEmpty(t, repo)

gitPath := t.TempDir()

doGitInitTestRepository(gitPath)(t)

oldPath := u.Path
oldUser := u.User
defer func() {
u.Path = oldPath
u.User = oldUser
}()
u.Path = repo.FullName() + ".git"
u.User = url.UserPassword(user.LowerName, userPassword)

doGitAddRemote(gitPath, "origin", u)(t)

gitRepo, err := git.OpenRepository(git.DefaultContext, gitPath)
require.NoError(t, err)
defer gitRepo.Close()

pushedBranches, deletedBranches := gitOperation(t, gitPath)

dbBranches := make([]*git_model.Branch, 0)
require.NoError(t, db.GetEngine(db.DefaultContext).Where("repo_id=?", repo.ID).Find(&dbBranches))
assert.Equalf(t, len(pushedBranches), len(dbBranches), "mismatched number of branches in db")
dbBranchesMap := make(map[string]*git_model.Branch, len(dbBranches))
for _, branch := range dbBranches {
dbBranchesMap[branch.Name] = branch
}

deletedBranchesMap := make(map[string]bool, len(deletedBranches))
for _, branchName := range deletedBranches {
deletedBranchesMap[branchName] = true
}

for _, branchName := range pushedBranches {
branch, ok := dbBranchesMap[branchName]
deleted := deletedBranchesMap[branchName]
assert.True(t, ok, "branch %s not found in database", branchName)
assert.Equal(t, deleted, branch.IsDeleted, "IsDeleted of %s is %v, but it's expected to be %v", branchName, branch.IsDeleted, deleted)
commitID, err := gitRepo.GetBranchCommitID(branchName)
require.NoError(t, err)
assert.Equal(t, commitID, branch.CommitID)
}

require.NoError(t, repo_service.DeleteRepositoryDirectly(db.DefaultContext, user, repo.ID))
}
Loading