Skip to content

Commit

Permalink
Inherit submodules from template repository content (#16237)
Browse files Browse the repository at this point in the history
Fix #10316

---------

Signed-off-by: Steffen Schröter <steffen@vexar.de>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
  • Loading branch information
sschroe and wxiaoguang authored Jan 1, 2025
1 parent 92a2900 commit 57eb9d0
Show file tree
Hide file tree
Showing 17 changed files with 290 additions and 136 deletions.
6 changes: 3 additions & 3 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,15 +242,15 @@ func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte {
return out
}

// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream
// This carefully avoids allocations - except where fnameBuf is too small.
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
//
// Each line is composed of:
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
//
// We don't attempt to convert the raw HASH to save a lot of time
func ParseTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
var readBytes []byte

// Read the Mode & fname
Expand All @@ -260,7 +260,7 @@ func ParseTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBu
}
idx := bytes.IndexByte(readBytes, ' ')
if idx < 0 {
log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes)
log.Debug("missing space in readBytes ParseCatFileTreeLine: %s", readBytes)
return mode, fname, sha, n, &ErrNotExist{}
}

Expand Down
78 changes: 78 additions & 0 deletions modules/git/parse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bytes"
"fmt"
"strconv"
"strings"

"code.gitea.io/gitea/modules/optional"
)

var sepSpace = []byte{' '}

type LsTreeEntry struct {
ID ObjectID
EntryMode EntryMode
Name string
Size optional.Option[int64]
}

func parseLsTreeLine(line []byte) (*LsTreeEntry, error) {
// expect line to be of the form:
// <mode> <type> <sha> <space-padded-size>\t<filename>
// <mode> <type> <sha>\t<filename>

var err error
posTab := bytes.IndexByte(line, '\t')
if posTab == -1 {
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
}

entry := new(LsTreeEntry)

entryAttrs := line[:posTab]
entryName := line[posTab+1:]

entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
if len(entryAttrs) > 0 {
entrySize := entryAttrs // the last field is the space-padded-size
size, _ := strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
entry.Size = optional.Some(size)
}

switch string(entryMode) {
case "100644":
entry.EntryMode = EntryModeBlob
case "100755":
entry.EntryMode = EntryModeExec
case "120000":
entry.EntryMode = EntryModeSymlink
case "160000":
entry.EntryMode = EntryModeCommit
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
entry.EntryMode = EntryModeTree
default:
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
}

entry.ID, err = NewIDFromString(string(entryObjectID))
if err != nil {
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
}

if len(entryName) > 0 && entryName[0] == '"' {
entry.Name, err = strconv.Unquote(string(entryName))
if err != nil {
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
}
} else {
entry.Name = string(entryName)
}
return entry, nil
}
67 changes: 12 additions & 55 deletions modules/git/parse_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ import (
"bytes"
"fmt"
"io"
"strconv"
"strings"

"code.gitea.io/gitea/modules/log"
)
Expand All @@ -21,71 +19,30 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
return parseTreeEntries(data, nil)
}

var sepSpace = []byte{' '}

// parseTreeEntries FIXME this function's design is not right, it should make the caller read all data into memory
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
var err error
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
for pos := 0; pos < len(data); {
// expect line to be of the form:
// <mode> <type> <sha> <space-padded-size>\t<filename>
// <mode> <type> <sha>\t<filename>
posEnd := bytes.IndexByte(data[pos:], '\n')
if posEnd == -1 {
posEnd = len(data)
} else {
posEnd += pos
}
line := data[pos:posEnd]
posTab := bytes.IndexByte(line, '\t')
if posTab == -1 {
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
}

entry := new(TreeEntry)
entry.ptree = ptree

entryAttrs := line[:posTab]
entryName := line[posTab+1:]

entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
if len(entryAttrs) > 0 {
entrySize := entryAttrs // the last field is the space-padded-size
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
entry.sized = true
}

switch string(entryMode) {
case "100644":
entry.entryMode = EntryModeBlob
case "100755":
entry.entryMode = EntryModeExec
case "120000":
entry.entryMode = EntryModeSymlink
case "160000":
entry.entryMode = EntryModeCommit
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
entry.entryMode = EntryModeTree
default:
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
}

entry.ID, err = NewIDFromString(string(entryObjectID))
line := data[pos:posEnd]
lsTreeLine, err := parseLsTreeLine(line)
if err != nil {
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
return nil, err
}

if len(entryName) > 0 && entryName[0] == '"' {
entry.name, err = strconv.Unquote(string(entryName))
if err != nil {
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
}
} else {
entry.name = string(entryName)
entry := &TreeEntry{
ptree: ptree,
ID: lsTreeLine.ID,
entryMode: lsTreeLine.EntryMode,
name: lsTreeLine.Name,
size: lsTreeLine.Size.Value(),
sized: lsTreeLine.Size.Has(),
}

pos = posEnd + 1
entries = append(entries, entry)
}
Expand All @@ -100,7 +57,7 @@ func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.

loop:
for sz > 0 {
mode, fname, sha, count, err := ParseTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf)
mode, fname, sha, count, err := ParseCatFileTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf)
if err != nil {
if err == io.EOF {
break loop
Expand Down
2 changes: 1 addition & 1 deletion modules/git/pipeline/lfs_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
case "tree":
var n int64
for n < size {
mode, fname, binObjectID, count, err := git.ParseTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf)
mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf)
if err != nil {
return nil, err
}
Expand Down
66 changes: 66 additions & 0 deletions modules/git/submodule.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bufio"
"context"
"fmt"
"os"

"code.gitea.io/gitea/modules/log"
)

type TemplateSubmoduleCommit struct {
Path string
Commit string
}

// GetTemplateSubmoduleCommits returns a list of submodules paths and their commits from a repository
// This function is only for generating new repos based on existing template, the template couldn't be too large.
func GetTemplateSubmoduleCommits(ctx context.Context, repoPath string) (submoduleCommits []TemplateSubmoduleCommit, _ error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, err
}
opts := &RunOpts{
Dir: repoPath,
Stdout: stdoutWriter,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer stdoutReader.Close()

scanner := bufio.NewScanner(stdoutReader)
for scanner.Scan() {
entry, err := parseLsTreeLine(scanner.Bytes())
if err != nil {
cancel()
return err
}
if entry.EntryMode == EntryModeCommit {
submoduleCommits = append(submoduleCommits, TemplateSubmoduleCommit{Path: entry.Name, Commit: entry.ID.String()})
}
}
return scanner.Err()
},
}
err = NewCommand(ctx, "ls-tree", "-r", "--", "HEAD").Run(opts)
if err != nil {
return nil, fmt.Errorf("GetTemplateSubmoduleCommits: error running git ls-tree: %v", err)
}
return submoduleCommits, nil
}

// AddTemplateSubmoduleIndexes Adds the given submodules to the git index.
// It is only for generating new repos based on existing template, requires the .gitmodules file to be already present in the work dir.
func AddTemplateSubmoduleIndexes(ctx context.Context, repoPath string, submodules []TemplateSubmoduleCommit) error {
for _, submodule := range submodules {
cmd := NewCommand(ctx, "update-index", "--add", "--cacheinfo", "160000").AddDynamicArguments(submodule.Commit, submodule.Path)
if stdout, _, err := cmd.RunStdString(&RunOpts{Dir: repoPath}); err != nil {
log.Error("Unable to add %s as submodule to repo %s: stdout %s\nError: %v", submodule.Path, repoPath, stdout, err)
return err
}
}
return nil
}
48 changes: 48 additions & 0 deletions modules/git/submodule_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestGetTemplateSubmoduleCommits(t *testing.T) {
testRepoPath := filepath.Join(testReposDir, "repo4_submodules")
submodules, err := GetTemplateSubmoduleCommits(DefaultContext, testRepoPath)
require.NoError(t, err)

assert.Len(t, submodules, 2)

assert.EqualValues(t, "<°)))><", submodules[0].Path)
assert.EqualValues(t, "d2932de67963f23d43e1c7ecf20173e92ee6c43c", submodules[0].Commit)

assert.EqualValues(t, "libtest", submodules[1].Path)
assert.EqualValues(t, "1234567890123456789012345678901234567890", submodules[1].Commit)
}

func TestAddTemplateSubmoduleIndexes(t *testing.T) {
ctx := context.Background()
tmpDir := t.TempDir()
var err error
_, _, err = NewCommand(ctx, "init").RunStdString(&RunOpts{Dir: tmpDir})
require.NoError(t, err)
_ = os.Mkdir(filepath.Join(tmpDir, "new-dir"), 0o755)
err = AddTemplateSubmoduleIndexes(ctx, tmpDir, []TemplateSubmoduleCommit{{Path: "new-dir", Commit: "1234567890123456789012345678901234567890"}})
require.NoError(t, err)
_, _, err = NewCommand(ctx, "add", "--all").RunStdString(&RunOpts{Dir: tmpDir})
require.NoError(t, err)
_, _, err = NewCommand(ctx, "-c", "user.name=a", "-c", "user.email=b", "commit", "-m=test").RunStdString(&RunOpts{Dir: tmpDir})
require.NoError(t, err)
submodules, err := GetTemplateSubmoduleCommits(DefaultContext, tmpDir)
require.NoError(t, err)
assert.Len(t, submodules, 1)
assert.EqualValues(t, "new-dir", submodules[0].Path)
assert.EqualValues(t, "1234567890123456789012345678901234567890", submodules[0].Commit)
}
1 change: 1 addition & 0 deletions modules/git/tests/repos/repo4_submodules/HEAD
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ref: refs/heads/master
4 changes: 4 additions & 0 deletions modules/git/tests/repos/repo4_submodules/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[core]
repositoryformatversion = 0
filemode = true
bare = true
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
x��[
�0E��*�_��$M�5tifBk Iŕ�7�k~��9ܘ��ܠ���.j�� �O� ��"z�`�#I�irF��͹��$%����|4)��?t��=��:K��#[$D����^�����Ӓy�HU/�f?G
1 change: 1 addition & 0 deletions modules/git/tests/repos/repo4_submodules/refs/heads/master
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e1e59caba97193d48862d6809912043871f37437
4 changes: 2 additions & 2 deletions modules/git/tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func NewTree(repo *Repository, id ObjectID) *Tree {
}
}

// SubTree get a sub tree by the sub dir path
// SubTree get a subtree by the sub dir path
func (t *Tree) SubTree(rpath string) (*Tree, error) {
if len(rpath) == 0 {
return t, nil
Expand Down Expand Up @@ -63,7 +63,7 @@ func (repo *Repository) LsTree(ref string, filenames ...string) ([]string, error
return filelist, err
}

// GetTreePathLatestCommitID returns the latest commit of a tree path
// GetTreePathLatestCommit returns the latest commit of a tree path
func (repo *Repository) GetTreePathLatestCommit(refName, treePath string) (*Commit, error) {
stdout, _, err := NewCommand(repo.Ctx, "rev-list", "-1").
AddDynamicArguments(refName).AddDashesAndList(treePath).
Expand Down
1 change: 0 additions & 1 deletion modules/git/tree_blob_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ func (t *Tree) GetTreeEntryByPath(relpath string) (*TreeEntry, error) {
ptree: t,
ID: t.ID,
name: "",
fullName: "",
entryMode: EntryModeTree,
}, nil
}
Expand Down
12 changes: 3 additions & 9 deletions modules/git/tree_entry_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,17 @@ import "code.gitea.io/gitea/modules/log"

// TreeEntry the leaf in the git tree
type TreeEntry struct {
ID ObjectID

ID ObjectID
ptree *Tree

entryMode EntryMode
name string

size int64
sized bool
fullName string
size int64
sized bool
}

// Name returns the name of the entry
func (te *TreeEntry) Name() string {
if te.fullName != "" {
return te.fullName
}
return te.name
}

Expand Down
Loading

0 comments on commit 57eb9d0

Please sign in to comment.