diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 6832207c0fc75..af3376e8d7127 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -10,6 +10,7 @@ import ( "bytes" "fmt" gohtml "html" + "io" "path/filepath" "strings" "sync" @@ -26,7 +27,7 @@ import ( ) // don't index files larger than this many bytes for performance purposes -const sizeLimit = 1000000 +const sizeLimit = 1024 * 1024 var ( // For custom user mapping @@ -46,7 +47,6 @@ func NewContext() { highlightMapping[keys[i].Name()] = keys[i].Value() } } - // The size 512 is simply a conservative rule of thumb c, err := lru.New2Q(512) if err != nil { @@ -60,7 +60,7 @@ func NewContext() { func Code(fileName, language, code string) string { NewContext() - // diff view newline will be passed as empty, change to literal \n so it can be copied + // diff view newline will be passed as empty, change to literal '\n' so it can be copied // preserve literal newline in blame view if code == "" || code == "\n" { return "\n" @@ -128,36 +128,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { return code } - htmlw.Flush() + _ = htmlw.Flush() // Chroma will add newlines for certain lexers in order to highlight them properly - // Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output + // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output return strings.TrimSuffix(htmlbuf.String(), "\n") } -// File returns a slice of chroma syntax highlighted lines of code -func File(numLines int, fileName, language string, code []byte) []string { +// File returns a slice of chroma syntax highlighted HTML lines of code +func File(fileName, language string, code []byte) ([]string, error) { NewContext() if len(code) > sizeLimit { - return plainText(string(code), numLines) + return PlainText(code), nil } + formatter := html.New(html.WithClasses(true), html.WithLineNumbers(false), html.PreventSurroundingPre(true), ) - if formatter == nil { - log.Error("Couldn't create chroma formatter") - return plainText(string(code), numLines) - } - - htmlbuf := bytes.Buffer{} - htmlw := bufio.NewWriter(&htmlbuf) + htmlBuf := bytes.Buffer{} + htmlWriter := bufio.NewWriter(&htmlBuf) var lexer chroma.Lexer // provided language overrides everything - if len(language) > 0 { + if language != "" { lexer = lexers.Get(language) } @@ -168,9 +164,9 @@ func File(numLines int, fileName, language string, code []byte) []string { } if lexer == nil { - language := analyze.GetCodeLanguage(fileName, code) + guessLanguage := analyze.GetCodeLanguage(fileName, code) - lexer = lexers.Get(language) + lexer = lexers.Get(guessLanguage) if lexer == nil { lexer = lexers.Match(fileName) if lexer == nil { @@ -181,54 +177,43 @@ func File(numLines int, fileName, language string, code []byte) []string { iterator, err := lexer.Tokenise(nil, string(code)) if err != nil { - log.Error("Can't tokenize code: %v", err) - return plainText(string(code), numLines) + return nil, fmt.Errorf("can't tokenize code: %w", err) } - err = formatter.Format(htmlw, styles.GitHub, iterator) + err = formatter.Format(htmlWriter, styles.GitHub, iterator) if err != nil { - log.Error("Can't format code: %v", err) - return plainText(string(code), numLines) + return nil, fmt.Errorf("can't format code: %w", err) } - htmlw.Flush() - finalNewLine := false - if len(code) > 0 { - finalNewLine = code[len(code)-1] == '\n' - } + _ = htmlWriter.Flush() - m := make([]string, 0, numLines) - for _, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) { - content := v - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" - } else if content == `` { - content += "\n" - } else if content == `` { - content += "\n" - } - content = strings.TrimSuffix(content, ``) - content = strings.TrimPrefix(content, ``) - m = append(m, content) + // at the moment, Chroma generates stable output `...\n` for each line + htmlStr := htmlBuf.String() + lines := strings.Split(htmlStr, ``) + m := make([]string, 0, len(lines)) + for i := 1; i < len(lines); i++ { + line := lines[i] + line = strings.TrimSuffix(line, "") + m = append(m, line) } - if finalNewLine { - m = append(m, "\n") - } - - return m + return m, nil } -// return unhiglighted map -func plainText(code string, numLines int) []string { - m := make([]string, 0, numLines) - for _, v := range strings.SplitN(code, "\n", numLines) { - content := v - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" +// PlainText returns non-highlighted HTML for code +func PlainText(code []byte) []string { + r := bufio.NewReader(bytes.NewReader(code)) + m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) + for { + content, err := r.ReadString('\n') + if err != nil && err != io.EOF { + log.Error("failed to read string from buffer: %v", err) + break + } + if content == "" && err == io.EOF { + break } - m = append(m, gohtml.EscapeString(content)) + s := gohtml.EscapeString(content) + m = append(m, s) } return m } diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index e5dfedd2b3c8e..8f83f4a2f6128 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -8,97 +8,146 @@ import ( "strings" "testing" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" - "github.com/stretchr/testify/assert" - "gopkg.in/ini.v1" ) +func lines(s string) []string { + return strings.Split(strings.ReplaceAll(strings.TrimSpace(s), `\n`, "\n"), "\n") +} + func TestFile(t *testing.T) { - setting.Cfg = ini.Empty() tests := []struct { - name string - numLines int - fileName string - code string - want string + name string + code string + want []string }{ { - name: ".drone.yml", - numLines: 12, - fileName: ".drone.yml", - code: util.Dedent(` - kind: pipeline - name: default + name: "empty.py", + code: "", + want: lines(""), + }, + { + name: "tags.txt", + code: "<>", + want: lines("<>"), + }, + { + name: "tags.py", + code: "<>", + want: lines(`<>`), + }, + { + name: "eol-no.py", + code: "a=1", + want: lines(`a=1`), + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: lines(`a=1\n`), + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: lines(` +a=1\n +\n + `, + ), + }, + { + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `), - want: util.Dedent(` - kind: pipeline - name: default - - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +def:\n + a=1\n +\n +b=''\n + \n +c=2`, + ), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out, err := File(tt.name, "", []byte(tt.code)) + assert.NoError(t, err) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.Equal(t, strings.Count(actual, "")) + assert.EqualValues(t, expected, actual) + }) + } +} + +func TestPlainText(t *testing.T) { + tests := []struct { + name string + code string + want []string + }{ + { + name: "empty.py", + code: "", + want: lines(""), + }, + { + name: "tags.py", + code: "<>", + want: lines("<>"), + }, + { + name: "eol-no.py", + code: "a=1", + want: lines(`a=1`), + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: lines(`a=1\n`), + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: lines(` +a=1\n +\n `), }, { - name: ".drone.yml - trailing space", - numLines: 13, - fileName: ".drone.yml", - code: strings.Replace(util.Dedent(` - kind: pipeline - name: default + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `)+"\n", "name: default", "name: default ", 1), - want: util.Dedent(` - kind: pipeline - name: default - - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - - - - `), +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +def:\n + a=1\n +\n +b=''\n + \n +c=2`), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := strings.Join(File(tt.numLines, tt.fileName, "", []byte(tt.code)), "\n") - assert.Equal(t, tt.want, got) + out := PlainText([]byte(tt.code)) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.EqualValues(t, expected, actual) }) } } diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 1a39b21c6ff80..c5657aa6756d2 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -15,7 +15,6 @@ import ( "net/http" "net/url" "path" - "strconv" "strings" "time" @@ -57,15 +56,6 @@ type namedBlob struct { blob *git.Blob } -func linesBytesCount(s []byte) int { - nl := []byte{'\n'} - n := bytes.Count(s, nl) - if len(s) > 0 && !bytes.HasSuffix(s, nl) { - n++ - } - return n -} - // FIXME: There has to be a more efficient way of doing this func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) { tree, err := commit.SubTree(treePath) @@ -555,8 +545,14 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ) } else { buf, _ := io.ReadAll(rd) - lineNums := linesBytesCount(buf) - ctx.Data["NumLines"] = strconv.Itoa(lineNums) + + // empty: 0 lines; "a": one line; "a\n": two lines; "a\nb": two lines; + // the NumLines is only used for the display on the UI: "xxx lines" + if len(buf) == 0 { + ctx.Data["NumLines"] = 0 + } else { + ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1 + } ctx.Data["NumLinesSet"] = true language := "" @@ -584,7 +580,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st language = "" } } - fileContent := highlight.File(lineNums, blob.Name(), language, buf) + fileContent, err := highlight.File(blob.Name(), language, buf) + if err != nil { + log.Error("highlight.File failed, fallback to plain text: %v", err) + fileContent = highlight.PlainText(buf) + } status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) ctx.Data["EscapeStatus"] = status statuses := make([]charset.EscapeStatus, len(fileContent))