diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index e10c4f7582e91..9a5d19074595f 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1212,6 +1212,9 @@ LEVEL = Info ;; Max size of files to be displayed (default is 8MiB) ;MAX_DISPLAY_FILE_SIZE = 8388608 ;; +;; Detect ambiguous unicode characters in file contents and show warnings on the UI +;AMBIGUOUS_UNICODE_DETECTION = true +;; ;; Whether the email of the user should be shown in the Explore Users page ;SHOW_USER_EMAIL = true ;; diff --git a/docs/content/administration/config-cheat-sheet.en-us.md b/docs/content/administration/config-cheat-sheet.en-us.md index c9e6a937c3482..8c65238de9ec6 100644 --- a/docs/content/administration/config-cheat-sheet.en-us.md +++ b/docs/content/administration/config-cheat-sheet.en-us.md @@ -220,6 +220,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a - `THEMES`: **gitea-auto,gitea-light,gitea-dark**: All available themes. Allow users select personalized themes. regardless of the value of `DEFAULT_THEME`. - `MAX_DISPLAY_FILE_SIZE`: **8388608**: Max size of files to be displayed (default is 8MiB) +- `AMBIGUOUS_UNICODE_DETECTION`: **true**: Detect ambiguous unicode characters in file contents and show warnings on the UI - `REACTIONS`: All available reactions users can choose on issues/prs and comments Values can be emoji alias (:smile:) or a unicode emoji. For custom reactions, add a tightly cropped square image to public/assets/img/emoji/reaction_name.png diff --git a/modules/charset/escape.go b/modules/charset/escape.go index 5608836a4510e..92e417d1f7283 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -8,11 +8,12 @@ package charset import ( - "bufio" + "html/template" "io" "strings" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/translation" ) @@ -20,20 +21,18 @@ import ( const RuneNBSP = 0xa0 // EscapeControlHTML escapes the unicode control sequences in a provided html document -func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { +func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { sb := &strings.Builder{} - outputStream := &HTMLStreamerWriter{Writer: sb} - streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) - - if err := StreamHTML(strings.NewReader(text), streamer); err != nil { - streamer.escaped.HasError = true - log.Error("Error whilst escaping: %v", err) - } - return streamer.escaped, sb.String() + escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader + return escaped, template.HTML(sb.String()) } -// EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte +// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { + if !setting.UI.AmbiguousUnicodeDetection { + _, err = io.Copy(writer, reader) + return &EscapeStatus{}, err + } outputStream := &HTMLStreamerWriter{Writer: writer} streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) @@ -43,41 +42,3 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation. } return streamer.escaped, err } - -// EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method. -func EscapeControlStringReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { - bufRd := bufio.NewReader(reader) - outputStream := &HTMLStreamerWriter{Writer: writer} - streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) - - for { - line, rdErr := bufRd.ReadString('\n') - if len(line) > 0 { - if err := streamer.Text(line); err != nil { - streamer.escaped.HasError = true - log.Error("Error whilst escaping: %v", err) - return streamer.escaped, err - } - } - if rdErr != nil { - if rdErr != io.EOF { - err = rdErr - } - break - } - } - return streamer.escaped, err -} - -// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string -func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { - sb := &strings.Builder{} - outputStream := &HTMLStreamerWriter{Writer: sb} - streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) - - if err := streamer.Text(text); err != nil { - streamer.escaped.HasError = true - log.Error("Error whilst escaping: %v", err) - } - return streamer.escaped, sb.String() -} diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index 03d4cfc0c17bc..3f08fd94a497f 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -64,7 +64,7 @@ func (e *escapeStreamer) Text(data string) error { until, next = nextIdxs[0]+pos, nextIdxs[1]+pos } - // from pos until until we know that the runes are not \r\t\n or even ' ' + // from pos until we know that the runes are not \r\t\n or even ' ' runes := make([]rune, 0, next-until) positions := make([]int, 0, next-until+1) diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index f63c5c5c52b32..83fa7b18513b4 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -132,20 +132,6 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, }, } -func TestEscapeControlString(t *testing.T) { - for _, tt := range escapeControlTests { - t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlString(tt.text, &translation.MockLocale{}) - if !reflect.DeepEqual(*status, tt.status) { - t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) - } - if result != tt.result { - t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result) - } - }) - } -} - func TestEscapeControlReader(t *testing.T) { // lets add some control characters to the tests tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) @@ -186,12 +172,3 @@ func TestEscapeControlReader(t *testing.T) { }) } } - -func TestEscapeControlReader_panic(t *testing.T) { - bs := make([]byte, 0, 20479) - bs = append(bs, 'A') - for i := 0; i < 6826; i++ { - bs = append(bs, []byte("—")...) - } - _, _ = EscapeControlString(string(bs), &translation.MockLocale{}) -} diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index a67217e864675..d7ab3f7afd3e7 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -9,6 +9,7 @@ import ( "bytes" "fmt" gohtml "html" + "html/template" "io" "path/filepath" "strings" @@ -55,7 +56,7 @@ func NewContext() { } // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name -func Code(fileName, language, code string) (string, string) { +func Code(fileName, language, code string) (output template.HTML, lexerName string) { NewContext() // diff view newline will be passed as empty, change to literal '\n' so it can be copied @@ -65,7 +66,7 @@ func Code(fileName, language, code string) (string, string) { } if len(code) > sizeLimit { - return code, "" + return template.HTML(template.HTMLEscapeString(code)), "" } var lexer chroma.Lexer @@ -102,13 +103,11 @@ func Code(fileName, language, code string) (string, string) { cache.Add(fileName, lexer) } - lexerName := formatLexerName(lexer.Config().Name) - - return CodeFromLexer(lexer, code), lexerName + return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name) } // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes -func CodeFromLexer(lexer chroma.Lexer, code string) string { +func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML { formatter := html.New(html.WithClasses(true), html.WithLineNumbers(false), html.PreventSurroundingPre(true), @@ -120,23 +119,23 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { iterator, err := lexer.Tokenise(nil, code) if err != nil { log.Error("Can't tokenize code: %v", err) - return code + return template.HTML(template.HTMLEscapeString(code)) } // style not used for live site but need to pass something err = formatter.Format(htmlw, githubStyles, iterator) if err != nil { log.Error("Can't format code: %v", err) - return code + return template.HTML(template.HTMLEscapeString(code)) } _ = htmlw.Flush() // Chroma will add newlines for certain lexers in order to highlight them properly // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output - return strings.TrimSuffix(htmlbuf.String(), "\n") + return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n")) } // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name -func File(fileName, language string, code []byte) ([]string, string, error) { +func File(fileName, language string, code []byte) ([]template.HTML, string, error) { NewContext() if len(code) > sizeLimit { @@ -183,14 +182,14 @@ func File(fileName, language string, code []byte) ([]string, string, error) { tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens()) htmlBuf := &bytes.Buffer{} - lines := make([]string, 0, len(tokensLines)) + lines := make([]template.HTML, 0, len(tokensLines)) for _, tokens := range tokensLines { iterator = chroma.Literator(tokens...) err = formatter.Format(htmlBuf, githubStyles, iterator) if err != nil { return nil, "", fmt.Errorf("can't format code: %w", err) } - lines = append(lines, htmlBuf.String()) + lines = append(lines, template.HTML(htmlBuf.String())) htmlBuf.Reset() } @@ -198,9 +197,9 @@ func File(fileName, language string, code []byte) ([]string, string, error) { } // PlainText returns non-highlighted HTML for code -func PlainText(code []byte) []string { +func PlainText(code []byte) []template.HTML { r := bufio.NewReader(bytes.NewReader(code)) - m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) + m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1) for { content, err := r.ReadString('\n') if err != nil && err != io.EOF { @@ -210,7 +209,7 @@ func PlainText(code []byte) []string { if content == "" && err == io.EOF { break } - s := gohtml.EscapeString(content) + s := template.HTML(gohtml.EscapeString(content)) m = append(m, s) } return m diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index 7a9887728f18d..31d8f9e6312d6 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -4,21 +4,36 @@ package highlight import ( + "html/template" "strings" "testing" "github.com/stretchr/testify/assert" ) -func lines(s string) []string { - return strings.Split(strings.ReplaceAll(strings.TrimSpace(s), `\n`, "\n"), "\n") +func lines(s string) (out []template.HTML) { + // "" => no line, "a" => 1 line, "a\n" => 1 line, "a\nb" => 2 lines, "a\nb\n" => 2 lines (each line always ends with "\n") + out = make([]template.HTML, 0) + s = strings.ReplaceAll(strings.ReplaceAll(strings.TrimSpace(s), "\n", ""), `\n`, "\n") + for { + if p := strings.IndexByte(s, '\n'); p != -1 { + out = append(out, template.HTML(s[:p+1])) + s = s[p+1:] + } else { + break + } + } + if s != "" { + out = append(out, template.HTML(s)) + } + return out } func TestFile(t *testing.T) { tests := []struct { name string code string - want []string + want []template.HTML lexerName string }{ { @@ -99,10 +114,7 @@ c=2 t.Run(tt.name, func(t *testing.T) { out, lexerName, err := File(tt.name, "", []byte(tt.code)) assert.NoError(t, err) - expected := strings.Join(tt.want, "\n") - actual := strings.Join(out, "\n") - assert.Equal(t, strings.Count(actual, "")) - assert.EqualValues(t, expected, actual) + assert.EqualValues(t, tt.want, out) assert.Equal(t, tt.lexerName, lexerName) }) } @@ -112,7 +124,7 @@ func TestPlainText(t *testing.T) { tests := []struct { name string code string - want []string + want []template.HTML }{ { name: "empty.py", @@ -165,9 +177,7 @@ c=2`), for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { out := PlainText([]byte(tt.code)) - expected := strings.Join(tt.want, "\n") - actual := strings.Join(out, "\n") - assert.EqualValues(t, expected, actual) + assert.EqualValues(t, tt.want, out) }) } } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index fdb468df1ab5b..e19e22eea0e1e 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -6,6 +6,7 @@ package code import ( "bytes" "context" + "html/template" "strings" "code.gitea.io/gitea/modules/highlight" @@ -22,7 +23,7 @@ type Result struct { Language string Color string LineNumbers []int - FormattedLines string + FormattedLines template.HTML } type SearchResultLanguages = internal.SearchResultLanguages diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index c1e0144199323..e7af02b49670f 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -87,7 +87,7 @@ func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error } lexer = chroma.Coalesce(lexer) - if _, err := w.WriteString(highlight.CodeFromLexer(lexer, source)); err != nil { + if _, err := w.WriteString(string(highlight.CodeFromLexer(lexer, source))); err != nil { return "" } } diff --git a/modules/setting/ui.go b/modules/setting/ui.go index 31042d3ee0dda..f94e6206cd995 100644 --- a/modules/setting/ui.go +++ b/modules/setting/ui.go @@ -35,6 +35,8 @@ var UI = struct { OnlyShowRelevantRepos bool ExploreDefaultSort string `ini:"EXPLORE_PAGING_DEFAULT_SORT"` + AmbiguousUnicodeDetection bool + Notification struct { MinTimeout time.Duration TimeoutStep time.Duration @@ -82,6 +84,9 @@ var UI = struct { Reactions: []string{`+1`, `-1`, `laugh`, `hooray`, `confused`, `heart`, `rocket`, `eyes`}, CustomEmojis: []string{`git`, `gitea`, `codeberg`, `gitlab`, `github`, `gogs`}, CustomEmojisMap: map[string]string{"git": ":git:", "gitea": ":gitea:", "codeberg": ":codeberg:", "gitlab": ":gitlab:", "github": ":github:", "gogs": ":gogs:"}, + + AmbiguousUnicodeDetection: true, + Notification: struct { MinTimeout time.Duration TimeoutStep time.Duration diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index 52d350ff665a0..f07086ef1c854 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -310,8 +310,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m lexerName = lexerNameForLine } - br.EscapeStatus, line = charset.EscapeControlHTML(line, ctx.Locale) - br.Code = gotemplate.HTML(line) + br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale) rows = append(rows, br) escapeStatus = escapeStatus.Or(br.EscapeStatus) } diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 70556185bb53b..48e2e20397232 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -9,6 +9,7 @@ import ( gocontext "context" "encoding/base64" "fmt" + "html/template" "image" "io" "net/http" @@ -17,6 +18,7 @@ import ( "slices" "strings" "time" + "unsafe" _ "image/gif" // for processing gif images _ "image/jpeg" // for processing jpeg images @@ -317,19 +319,18 @@ func renderReadmeFile(ctx *context.Context, subfolder string, readmeFile *git.Tr }, rd) if err != nil { log.Error("Render failed for %s in %-v: %v Falling back to rendering source", readmeFile.Name(), ctx.Repo.Repository, err) - buf := &bytes.Buffer{} - ctx.Data["EscapeStatus"], _ = charset.EscapeControlStringReader(rd, buf, ctx.Locale) - ctx.Data["FileContent"] = buf.String() + delete(ctx.Data, "IsMarkup") } - } else { + } + + if ctx.Data["IsMarkup"] != true { ctx.Data["IsPlainText"] = true - buf := &bytes.Buffer{} - ctx.Data["EscapeStatus"], err = charset.EscapeControlStringReader(rd, buf, ctx.Locale) + content, err := io.ReadAll(rd) if err != nil { - log.Error("Read failed: %v", err) + log.Error("Read readme content failed: %v", err) } - - ctx.Data["FileContent"] = buf.String() + contentEscaped := template.HTMLEscapeString(unsafe.String(&content[0], len(content))) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(template.HTML(contentEscaped), ctx.Locale) } if !fInfo.isLFSFile && ctx.Repo.CanEnableEditor(ctx, ctx.Doer) { @@ -495,7 +496,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st // The Open Group Base Specification: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html // empty: 0 lines; "a": 1 line, 1 incomplete-line; "a\n": 1 line; "a\nb": 1 line, 1 incomplete-line; // Gitea uses the definition (like most modern editors): - // empty: 0 lines; "a": 1 line; "a\n": 2 lines; "a\nb": 2 lines; + // empty: 0 lines; "a": 1 line; "a\n": 1 line; "a\nb": 2 lines; // When rendering, the last empty line is not rendered in UI, while the line-number is still counted, to tell users that the file contains a trailing EOL. // To make the UI more consistent, it could use an icon mark to indicate that there is no trailing EOL, and show line-number as the rendered lines. // This NumLines is only used for the display on the UI: "xxx lines" @@ -620,7 +621,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st } } -func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input io.Reader) (escaped *charset.EscapeStatus, output string, err error) { +func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input io.Reader) (escaped *charset.EscapeStatus, output template.HTML, err error) { markupRd, markupWr := io.Pipe() defer markupWr.Close() done := make(chan struct{}) @@ -628,7 +629,7 @@ func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input i sb := &strings.Builder{} // We allow NBSP here this is rendered escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP) - output = sb.String() + output = template.HTML(sb.String()) close(done) }() err = markup.Render(renderCtx, input, markupWr) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 8bf6cba844d6b..6f3a46f30a6ee 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -285,15 +285,15 @@ type DiffInline struct { // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { - status, content := charset.EscapeControlHTML(string(s), locale) - return DiffInline{EscapeStatus: status, Content: template.HTML(content)} + status, content := charset.EscapeControlHTML(s, locale) + return DiffInline{EscapeStatus: status, Content: content} } // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { highlighted, _ := highlight.Code(fileName, language, code) status, content := charset.EscapeControlHTML(highlighted, locale) - return DiffInline{EscapeStatus: status, Content: template.HTML(content)} + return DiffInline{EscapeStatus: status, Content: content} } // GetComputedInlineDiffFor computes inline diff for the given line. diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go index f1e2b1d3cb31a..35d48445504ae 100644 --- a/services/gitdiff/highlightdiff.go +++ b/services/gitdiff/highlightdiff.go @@ -93,10 +93,10 @@ func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB highlightCodeA, _ := highlight.Code(filename, language, codeA) highlightCodeB, _ := highlight.Code(filename, language, codeB) - highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) - highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) + convertedCodeA := hcd.convertToPlaceholders(string(highlightCodeA)) + convertedCodeB := hcd.convertToPlaceholders(string(highlightCodeB)) - diffs := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) + diffs := diffMatchPatch.DiffMain(convertedCodeA, convertedCodeB, true) diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) for i := range diffs { diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 4129a133b7e76..e7d1c04c12ef2 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -74,9 +74,9 @@ {{end}}
{{if .IsMarkup}} - {{if .FileContent}}{{.FileContent | Safe}}{{end}} + {{if .FileContent}}{{.FileContent}}{{end}} {{else if .IsPlainText}} -
{{if .FileContent}}{{.FileContent | Safe}}{{end}}
+
{{if .FileContent}}{{.FileContent}}{{end}}
{{else if not .IsTextSource}}
{{if .IsImageFile}} @@ -114,7 +114,7 @@ {{if $.EscapeStatus.Escaped}} {{if (index $.LineEscapeStatus $idx).Escaped}}{{end}} {{end}} - {{$code | Safe}} + {{$code}} {{end}}