55package repofiles
66
77import (
8+ "bytes"
89 "fmt"
910 "path"
1011 "strings"
1112
13+ "golang.org/x/net/html/charset"
14+ "golang.org/x/text/transform"
15+
1216 "code.gitea.io/gitea/models"
17+ "code.gitea.io/gitea/modules/base"
1318 "code.gitea.io/gitea/modules/git"
1419 "code.gitea.io/gitea/modules/lfs"
20+ "code.gitea.io/gitea/modules/log"
1521 "code.gitea.io/gitea/modules/setting"
1622 "code.gitea.io/sdk/gitea"
1723)
@@ -37,6 +43,70 @@ type UpdateRepoFileOptions struct {
3743 Committer * IdentityOptions
3844}
3945
46+ func detectEncodingAndBOM (entry * git.TreeEntry , repo * models.Repository ) (string , bool ) {
47+ reader , err := entry .Blob ().DataAsync ()
48+ if err != nil {
49+ // return default
50+ return "UTF-8" , false
51+ }
52+ defer reader .Close ()
53+ buf := make ([]byte , 1024 )
54+ n , err := reader .Read (buf )
55+ if err != nil {
56+ // return default
57+ return "UTF-8" , false
58+ }
59+ buf = buf [:n ]
60+
61+ if setting .LFS .StartServer {
62+ meta := lfs .IsPointerFile (& buf )
63+ if meta != nil {
64+ meta , err = repo .GetLFSMetaObjectByOid (meta .Oid )
65+ if err != nil && err != models .ErrLFSObjectNotExist {
66+ // return default
67+ return "UTF-8" , false
68+ }
69+ }
70+ if meta != nil {
71+ dataRc , err := lfs .ReadMetaObject (meta )
72+ if err != nil {
73+ // return default
74+ return "UTF-8" , false
75+ }
76+ defer dataRc .Close ()
77+ buf = make ([]byte , 1024 )
78+ n , err = dataRc .Read (buf )
79+ if err != nil {
80+ // return default
81+ return "UTF-8" , false
82+ }
83+ buf = buf [:n ]
84+ }
85+
86+ }
87+
88+ encoding , err := base .DetectEncoding (buf )
89+ if err != nil {
90+ // just default to utf-8 and no bom
91+ return "UTF-8" , false
92+ }
93+ if encoding == "UTF-8" {
94+ return encoding , bytes .Equal (buf [0 :3 ], base .UTF8BOM )
95+ }
96+ charsetEncoding , _ := charset .Lookup (encoding )
97+ if charsetEncoding == nil {
98+ return "UTF-8" , false
99+ }
100+
101+ result , n , err := transform .String (charsetEncoding .NewDecoder (), string (buf ))
102+
103+ if n > 2 {
104+ return encoding , bytes .Equal ([]byte (result )[0 :3 ], base .UTF8BOM )
105+ }
106+
107+ return encoding , false
108+ }
109+
40110// CreateOrUpdateRepoFile adds or updates a file in the given repository
41111func CreateOrUpdateRepoFile (repo * models.Repository , doer * models.User , opts * UpdateRepoFileOptions ) (* gitea.FileResponse , error ) {
42112 // If no branch name is set, assume master
@@ -118,6 +188,9 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
118188 opts .LastCommitID = commit .ID .String ()
119189 }
120190
191+ encoding := "UTF-8"
192+ bom := false
193+
121194 if ! opts .IsNewFile {
122195 fromEntry , err := commit .GetTreeEntryByPath (fromTreePath )
123196 if err != nil {
@@ -151,6 +224,7 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
151224 // haven't been made. We throw an error if one wasn't provided.
152225 return nil , models.ErrSHAOrCommitIDNotProvided {}
153226 }
227+ encoding , bom = detectEncodingAndBOM (fromEntry , repo )
154228 }
155229
156230 // For the path where this file will be created/updated, we need to make
@@ -235,9 +309,28 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
235309 }
236310
237311 content := opts .Content
312+ if bom {
313+ content = string (base .UTF8BOM ) + content
314+ }
315+ if encoding != "UTF-8" {
316+ charsetEncoding , _ := charset .Lookup (encoding )
317+ if charsetEncoding != nil {
318+ result , _ , err := transform .String (charsetEncoding .NewEncoder (), string (content ))
319+ if err != nil {
320+ // Look if we can't encode back in to the original we should just stick with utf-8
321+ log .Error ("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v" , opts .TreePath , opts .FromTreePath , encoding , err )
322+ result = content
323+ }
324+ content = result
325+ } else {
326+ log .Error ("Unknown encoding: %s" , encoding )
327+ }
328+ }
329+ // Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content
330+ opts .Content = content
238331 var lfsMetaObject * models.LFSMetaObject
239332
240- if filename2attribute2info [treePath ] != nil && filename2attribute2info [treePath ]["filter" ] == "lfs" {
333+ if setting . LFS . StartServer && filename2attribute2info [treePath ] != nil && filename2attribute2info [treePath ]["filter" ] == "lfs" {
241334 // OK so we are supposed to LFS this data!
242335 oid , err := models .GenerateLFSOid (strings .NewReader (opts .Content ))
243336 if err != nil {
0 commit comments