Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support repo code search without setting up an indexer #29998

Merged
merged 13 commits into from
Mar 24, 2024
6 changes: 6 additions & 0 deletions docs/content/administration/repo-indexer.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ menu:

# Repository indexer

## Builtin repository code search without indexer

Users could do repository-level code search without setting up a repository indexer.
The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories.
Better code search support could be achieved by setting up the repository indexer.

## Setting up the repository indexer

Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md):
Expand Down
3 changes: 3 additions & 0 deletions docs/content/installation/comparison.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ _Symbols used in table:_
| Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ |

- Gitea has builtin repository-level code search
- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md)

## Issue Tracker

| Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE |
Expand Down
5 changes: 2 additions & 3 deletions modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,6 @@ type RunStdError interface {
error
Unwrap() error
Stderr() string
IsExitCode(code int) bool
}

type runStdError struct {
Expand All @@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string {
return r.stderr
}

func (r *runStdError) IsExitCode(code int) bool {
func IsErrorExitCode(err error, code int) bool {
var exitError *exec.ExitError
if errors.As(r.err, &exitError) {
if errors.As(err, &exitError) {
return exitError.ExitCode() == code
}
return false
Expand Down
8 changes: 4 additions & 4 deletions modules/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ func checkGitVersionCompatibility(gitVer *version.Version) error {

func configSet(key, value string) error {
stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil)
if err != nil && !err.IsExitCode(1) {
if err != nil && !IsErrorExitCode(err, 1) {
return fmt.Errorf("failed to get git config %s, err: %w", key, err)
}

Expand All @@ -364,7 +364,7 @@ func configSetNonExist(key, value string) error {
// already exist
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist, set new config
_, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil {
Expand All @@ -382,7 +382,7 @@ func configAddNonExist(key, value string) error {
// already exist
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist, add new config
_, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil {
Expand All @@ -403,7 +403,7 @@ func configUnsetAll(key, value string) error {
}
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist
return nil
}
Expand Down
112 changes: 112 additions & 0 deletions modules/git/grep.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"

"code.gitea.io/gitea/modules/util"
)

type GrepResult struct {
Filename string
LineNumbers []int
LineCodes []string
}

type GrepOptions struct {
RefName string
ContextLineNumber int
IsFuzzy bool
}

func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()

/*
The output is like this ( "^@" means \x00):

HEAD:.air.toml
6^@bin = "gitea"

HEAD:.changelog.yml
2^@repo: go-gitea/gitea
*/
var results []*GrepResult
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.IsFuzzy {
words := strings.Fields(search)
for _, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
}
} else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
}
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
stderr := bytes.Buffer{}
err = cmd.Run(&RunOpts{
Dir: repo.Path,
Stdout: stdoutWriter,
Stderr: &stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer stdoutReader.Close()

isInBlock := false
scanner := bufio.NewScanner(stdoutReader)
var res *GrepResult
for scanner.Scan() {
line := scanner.Text()
if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true
res = &GrepResult{Filename: filename}
results = append(results, res)
}
continue
}
if line == "" {
if len(results) >= 50 {
cancel()
break
}
isInBlock = false
continue
}
if line == "--" {
continue
}
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
lineNumInt, _ := strconv.Atoi(lineNum)
res.LineNumbers = append(res.LineNumbers, lineNumInt)
res.LineCodes = append(res.LineCodes, lineCode)
}
}
return scanner.Err()
},
})
// git grep exits with 1 if no results are found
if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
return nil, nil
}
if err != nil && !errors.Is(err, context.Canceled) {
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
}
return results, nil
}
41 changes: 41 additions & 0 deletions modules/git/grep_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"context"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
)

func TestGrepSearch(t *testing.T) {
repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo"))
assert.NoError(t, err)
defer repo.Close()

res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
assert.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
{
Filename: "main.vendor.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
}, res)

res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
assert.NoError(t, err)
assert.Len(t, res, 0)

res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{})
assert.Error(t, err)
assert.Len(t, res, 0)
}
35 changes: 18 additions & 17 deletions modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
return nil
}

func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(filename, "", code)
highlightedLines := strings.Split(string(hl), "\n")

// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
for i := 0; i < len(lines); i++ {
lines[i].Num = lineNums[i]
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}
return lines
}

func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")

var formattedLinesBuffer bytes.Buffer

contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
lines := make([]ResultLine, 0, len(contentLines))
lineNums := make([]int, 0, len(contentLines))
index := startIndex
for i, line := range contentLines {
var err error
Expand All @@ -91,37 +105,24 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
line[closeActiveIndex:],
)
} else {
err = writeStrings(&formattedLinesBuffer,
line,
)
err = writeStrings(&formattedLinesBuffer, line)
}
if err != nil {
return nil, err
}

lines = append(lines, ResultLine{Num: startLineNum + i})
lineNums = append(lineNums, startLineNum+i)
index += len(line)
}

// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String())
highlightedLines := strings.Split(string(hl), "\n")

// The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n`
lines = lines[:min(len(highlightedLines), len(lines))]
highlightedLines = highlightedLines[:len(lines)]
for i := 0; i < len(lines); i++ {
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}

return &Result{
RepoID: result.RepoID,
Filename: result.Filename,
CommitID: result.CommitID,
UpdatedUnix: result.UpdatedUnix,
Language: result.Language,
Color: result.Color,
Lines: lines,
Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
}, nil
}

Expand Down
1 change: 1 addition & 0 deletions options/locale/locale_en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ org_kind = Search orgs...
team_kind = Search teams...
code_kind = Search code...
code_search_unavailable = Code search is currently not available. Please contact the site administrator.
code_search_by_git_grep = Current code search results are provided by "git grep". There might be better results if site administrator enables Repository Indexer.
package_kind = Search packages...
project_kind = Search projects...
branch_kind = Search branches...
Expand Down
Loading