Skip to content

Commit

Permalink
Fixes go-gitea#16558 CSV delimiter determiner
Browse files Browse the repository at this point in the history
  • Loading branch information
richmahn committed Oct 25, 2021
1 parent 07c7100 commit fc8dd28
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 13 deletions.
29 changes: 26 additions & 3 deletions modules/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import (
stdcsv "encoding/csv"
"errors"
"io"
"path/filepath"
"regexp"
"strings"

"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
)
Expand All @@ -26,8 +28,8 @@ func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader {
return rd
}

// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader.
func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) {
// CreateReaderAndDetermineDelimiter tries to guess the field delimiter from the content and creates a csv.Reader.
func CreateReaderAndDetermineDelimiter(ctx *markup.RenderContext, rd io.Reader) (*stdcsv.Reader, error) {
var data = make([]byte, 1e4)
size, err := rd.Read(data)
if err != nil {
Expand All @@ -37,7 +39,7 @@ func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) {
return nil, err
}

delimiter := guessDelimiter(data[:size])
delimiter := determineDelimiter(ctx, data[:size])

var newInput io.Reader
if size < 1e4 {
Expand All @@ -49,6 +51,27 @@ func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) {
return CreateReader(newInput, delimiter), nil
}

// determineDelimiter takes a RenderContext and if it isn't nil and the Filename has an extension that specifies the delimiter,
// it is used as the delimiter. Otherwise we call guessDelimiter with the data passed
func determineDelimiter(ctx *markup.RenderContext, data []byte) rune {
extension := ".csv"
if ctx != nil {
extension = strings.ToLower(filepath.Ext(ctx.Filename))
}

var delimiter rune
switch extension {
case ".tsv":
delimiter = '\t'
case ".psv":
delimiter = '|'
default:
delimiter = guessDelimiter(data)
}

return delimiter
}

// guessDelimiter scores the input CSV data against delimiters, and returns the best match.
// Reads at most 10k bytes & 10 lines.
func guessDelimiter(data []byte) rune {
Expand Down
5 changes: 3 additions & 2 deletions modules/csv/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ func TestCreateReader(t *testing.T) {
assert.Equal(t, ',', rd.Comma)
}

func TestCreateReaderAndGuessDelimiter(t *testing.T) {
func TestCreateReaderAndDetermineDelimiter(t *testing.T) {
input := "a;b;c\n1;2;3\n4;5;6"

rd, err := CreateReaderAndGuessDelimiter(strings.NewReader(input))
rd, err := CreateReaderAndDetermineDelimiter(nil, strings.NewReader(input))
assert.NoError(t, err)
assert.Equal(t, ';', rd.Comma)
}
Expand All @@ -35,6 +35,7 @@ func TestGuessDelimiter(t *testing.T) {
"1,2,3;4,5,6;7,8,9\na;b;c": ';',
"\"1,2,3,4\";\"a\nb\"\nc;d": ';',
"<br/>": ',',
"name\temail\tnote\nJohn Doe\tjohn@doe.com\tThis,note,had,a,lot,of,commas,to,test,delimters": '\t',
}

for k, v := range kases {
Expand Down
9 changes: 5 additions & 4 deletions routers/web/repo/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
csv_module "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/upload"
"code.gitea.io/gitea/modules/util"
Expand Down Expand Up @@ -105,7 +106,7 @@ func setCsvCompareContext(ctx *context.Context) {

errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large"))

csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) {
csvReaderFromCommit := func(ctx *markup.RenderContext, c *git.Commit) (*csv.Reader, error) {
blob, err := c.GetBlobByPath(diffFile.Name)
if err != nil {
return nil, err
Expand All @@ -121,14 +122,14 @@ func setCsvCompareContext(ctx *context.Context) {
}
defer reader.Close()

return csv_module.CreateReaderAndGuessDelimiter(charset.ToUTF8WithFallbackReader(reader))
return csv_module.CreateReaderAndGuessDelimiter(ctx, charset.ToUTF8WithFallbackReader(reader))
}

baseReader, err := csvReaderFromCommit(baseCommit)
baseReader, err := csvReaderFromCommit(&markup.RenderContext{Filename: diffFile.OldName}, baseCommit)
if err == errTooLarge {
return CsvDiffResult{nil, err.Error()}
}
headReader, err := csvReaderFromCommit(headCommit)
headReader, err := csvReaderFromCommit(&markup.RenderContext{Filename: diffFile.Name}, headCommit)
if err == errTooLarge {
return CsvDiffResult{nil, err.Error()}
}
Expand Down
8 changes: 4 additions & 4 deletions services/gitdiff/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,16 @@ c,d,e`,

var baseReader *csv.Reader
if len(c.base) > 0 {
baseReader, err = csv_module.CreateReaderAndGuessDelimiter(strings.NewReader(c.base))
baseReader, err = csv_module.CreateReaderAndDetermineDelimiter(nil, strings.NewReader(c.base))
if err != nil {
t.Errorf("CreateReaderAndGuessDelimiter failed: %s", err)
t.Errorf("CreateReaderAndDetermineDelimiter failed: %s", err)
}
}
var headReader *csv.Reader
if len(c.head) > 0 {
headReader, err = csv_module.CreateReaderAndGuessDelimiter(strings.NewReader(c.head))
headReader, err = csv_module.CreateReaderAndDetermineDelimiter(nil, strings.NewReader(c.head))
if err != nil {
t.Errorf("CreateReaderAndGuessDelimiter failed: %s", err)
t.Errorf("CreateReaderAndDetermineDelimiter failed: %s", err)
}
}

Expand Down

0 comments on commit fc8dd28

Please sign in to comment.