Skip to content

Commit 5687809

Browse files
authored
web: escape + as %2B in file path templates (#843)
When constructing a shard we specify templates for constructing a URL. Finally those URLs end up going via html/template which has pretty strict escaping rules. This commit makes two changes: URL construction via text/template. We still get the safety benefits later on when finally rendering the output, but given we are constructing URLs it makes more sense to use text/template. Special escaping of + in URLs. I couldn't convince html/template to not break URls containing + in it. So instead we use + escaped to %2B. I tested gerrit, github and sourcegraph with %2B in filenames and they all worked. To do the above I introduced a template function called URLJoinPath which is a wrapper around url.JoinPath with the additional behaviour around + escaping. Test Plan: Did lots of updates in tests. See diff.
1 parent 6501360 commit 5687809

File tree

6 files changed

+191
-31
lines changed

6 files changed

+191
-31
lines changed

gitindex/index.go

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,22 +90,38 @@ func setTemplates(repo *zoekt.Repository, u *url.URL, typ string) error {
9090
u.User = nil
9191
}
9292

93+
// helper to generate u.JoinPath as a template
94+
varVersion := ".Version"
95+
varPath := ".Path"
96+
urlJoinPath := func(elem ...string) string {
97+
elem = append([]string{u.String()}, elem...)
98+
var parts []string
99+
for _, e := range elem {
100+
if e == varVersion || e == varPath {
101+
parts = append(parts, e)
102+
} else {
103+
parts = append(parts, strconv.Quote(e))
104+
}
105+
}
106+
return fmt.Sprintf("{{URLJoinPath %s}}", strings.Join(parts, " "))
107+
}
108+
93109
repo.URL = u.String()
94110
switch typ {
95111
case "gitiles":
96112
// eg. https://gerrit.googlesource.com/gitiles/+/master/tools/run_dev.sh#20
97-
repo.CommitURLTemplate = u.String() + "/+/{{.Version}}"
98-
repo.FileURLTemplate = u.String() + "/+/{{.Version}}/{{.Path}}"
113+
repo.CommitURLTemplate = urlJoinPath("+", varVersion)
114+
repo.FileURLTemplate = urlJoinPath("+", varVersion, varPath)
99115
repo.LineFragmentTemplate = "#{{.LineNumber}}"
100116
case "github":
101117
// eg. https://github.com/hanwen/go-fuse/blob/notify/genversion.sh#L10
102-
repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}"
103-
repo.FileURLTemplate = u.String() + "/blob/{{.Version}}/{{.Path}}"
118+
repo.CommitURLTemplate = urlJoinPath("commit", varVersion)
119+
repo.FileURLTemplate = urlJoinPath("blob", varVersion, varPath)
104120
repo.LineFragmentTemplate = "#L{{.LineNumber}}"
105121
case "cgit":
106122
// http://git.savannah.gnu.org/cgit/lilypond.git/tree/elisp/lilypond-mode.el?h=dev/philh&id=b2ca0fefe3018477aaca23b6f672c7199ba5238e#n100
107-
repo.CommitURLTemplate = u.String() + "/commit/?id={{.Version}}"
108-
repo.FileURLTemplate = u.String() + "/tree/{{.Path}}/?id={{.Version}}"
123+
repo.CommitURLTemplate = urlJoinPath("commit") + "/?id={{.Version}}"
124+
repo.FileURLTemplate = urlJoinPath("tree", varPath) + "/?id={{.Version}}"
109125
repo.LineFragmentTemplate = "#n{{.LineNumber}}"
110126
case "gitweb":
111127
// https://gerrit.libreoffice.org/gitweb?p=online.git;a=blob;f=Makefile.am;h=cfcfd7c36fbae10e269653dc57a9b68c92d4c10b;hb=848145503bf7b98ce4a4aa0a858a0d71dd0dbb26#l10
@@ -115,29 +131,29 @@ func setTemplates(repo *zoekt.Repository, u *url.URL, typ string) error {
115131
case "source.bazel.build":
116132
// https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9
117133
// https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9:tools/cpp/BUILD.empty;l=10
118-
repo.CommitURLTemplate = u.String() + "/+/{{.Version}}"
119-
repo.FileURLTemplate = u.String() + "/+/{{.Version}}:{{.Path}}"
134+
repo.CommitURLTemplate = u.String() + "/%2B/{{.Version}}"
135+
repo.FileURLTemplate = u.String() + "/%2B/{{.Version}}:{{.Path}}"
120136
repo.LineFragmentTemplate = ";l={{.LineNumber}}"
121137
case "bitbucket-server":
122138
// https://<bitbucketserver-host>/projects/<project>/repos/<repo>/commits/5be7ca73b898bf17a08e607918accfdeafe1e0bc
123139
// https://<bitbucketserver-host>/projects/<project>/repos/<repo>/browse/<file>?at=5be7ca73b898bf17a08e607918accfdeafe1e0bc
124-
repo.CommitURLTemplate = u.String() + "/commits/{{.Version}}"
125-
repo.FileURLTemplate = u.String() + "/{{.Path}}?at={{.Version}}"
140+
repo.CommitURLTemplate = urlJoinPath("commits", varVersion)
141+
repo.FileURLTemplate = urlJoinPath(varPath) + "?at={{.Version}}"
126142
repo.LineFragmentTemplate = "#{{.LineNumber}}"
127143
case "gitlab":
128144
// https://gitlab.com/gitlab-org/omnibus-gitlab/-/commit/b152c864303dae0e55377a1e2c53c9592380ffed
129145
// https://gitlab.com/gitlab-org/omnibus-gitlab/-/blob/aad04155b3f6fc50ede88aedaee7fc624d481149/files/gitlab-config-template/gitlab.rb.template
130-
repo.CommitURLTemplate = u.String() + "/-/commit/{{.Version}}"
131-
repo.FileURLTemplate = u.String() + "/-/blob/{{.Version}}/{{.Path}}"
146+
repo.CommitURLTemplate = urlJoinPath("-/commit", varVersion)
147+
repo.FileURLTemplate = urlJoinPath("-/blob", varVersion, varPath)
132148
repo.LineFragmentTemplate = "#L{{.LineNumber}}"
133149
case "gitea":
134-
repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}"
150+
repo.CommitURLTemplate = urlJoinPath("commit", varVersion)
135151
// NOTE The `display=source` query parameter is required to disable file rendering.
136152
// Since line numbers are disabled in rendered files, you wouldn't be able to jump to
137153
// a line without `display=source`. This is supported since gitea 1.17.0.
138154
// When /src/{{.Version}} is used it will redirect to /src/commit/{{.Version}},
139155
// but the query parameters are obmitted.
140-
repo.FileURLTemplate = u.String() + "/src/commit/{{.Version}}/{{.Path}}?display=source"
156+
repo.FileURLTemplate = urlJoinPath("src/commit", varVersion, varPath) + "?display=source"
141157
repo.LineFragmentTemplate = "#L{{.LineNumber}}"
142158
default:
143159
return fmt.Errorf("URL scheme type %q unknown", typ)

gitindex/index_test.go

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@ import (
1818
"bytes"
1919
"context"
2020
"fmt"
21+
"net/url"
2122
"os"
2223
"os/exec"
2324
"path/filepath"
2425
"runtime"
2526
"sort"
27+
"strings"
2628
"testing"
2729

2830
"github.com/go-git/go-git/v5"
@@ -770,7 +772,7 @@ func runScript(t *testing.T, cwd string, script string) {
770772
}
771773
}
772774

773-
func TestSetTemplate(t *testing.T) {
775+
func TestSetTemplates_e2e(t *testing.T) {
774776
repositoryDir := t.TempDir()
775777

776778
// setup: initialize the repository and all of its branches
@@ -781,11 +783,100 @@ func TestSetTemplate(t *testing.T) {
781783
t.Fatalf("setTemplatesFromConfig: %v", err)
782784
}
783785

784-
if got, want := desc.FileURLTemplate, "https://github.com/sourcegraph/zoekt/blob/{{.Version}}/{{.Path}}"; got != want {
786+
if got, want := desc.FileURLTemplate, `{{URLJoinPath "https://github.com/sourcegraph/zoekt" "blob" .Version .Path}}`; got != want {
785787
t.Errorf("got %q, want %q", got, want)
786788
}
787789
}
788790

791+
func TestSetTemplates(t *testing.T) {
792+
base := "https://example.com/repo/name"
793+
version := "VERSION"
794+
path := "dir/name.txt"
795+
lineNumber := 10
796+
cases := []struct {
797+
typ string
798+
commit string
799+
file string
800+
line string
801+
}{{
802+
typ: "gitiles",
803+
commit: "https://example.com/repo/name/%2B/VERSION",
804+
file: "https://example.com/repo/name/%2B/VERSION/dir/name.txt",
805+
line: "#10",
806+
}, {
807+
typ: "github",
808+
commit: "https://example.com/repo/name/commit/VERSION",
809+
file: "https://example.com/repo/name/blob/VERSION/dir/name.txt",
810+
line: "#L10",
811+
}, {
812+
typ: "cgit",
813+
commit: "https://example.com/repo/name/commit/?id=VERSION",
814+
file: "https://example.com/repo/name/tree/dir/name.txt/?id=VERSION",
815+
line: "#n10",
816+
}, {
817+
typ: "gitweb",
818+
commit: "https://example.com/repo/name;a=commit;h=VERSION",
819+
file: "https://example.com/repo/name;a=blob;f=dir/name.txt;hb=VERSION",
820+
line: "#l10",
821+
}, {
822+
typ: "source.bazel.build",
823+
commit: "https://example.com/repo/name/%2B/VERSION",
824+
file: "https://example.com/repo/name/%2B/VERSION:dir/name.txt",
825+
line: ";l=10",
826+
}, {
827+
typ: "bitbucket-server",
828+
commit: "https://example.com/repo/name/commits/VERSION",
829+
file: "https://example.com/repo/name/dir/name.txt?at=VERSION",
830+
line: "#10",
831+
}, {
832+
typ: "gitlab",
833+
commit: "https://example.com/repo/name/-/commit/VERSION",
834+
file: "https://example.com/repo/name/-/blob/VERSION/dir/name.txt",
835+
line: "#L10",
836+
}, {
837+
typ: "gitea",
838+
commit: "https://example.com/repo/name/commit/VERSION",
839+
file: "https://example.com/repo/name/src/commit/VERSION/dir/name.txt?display=source",
840+
line: "#L10",
841+
}}
842+
843+
for _, tc := range cases {
844+
t.Run(tc.typ, func(t *testing.T) {
845+
assertOutput := func(templateText string, want string) {
846+
t.Helper()
847+
848+
tt, err := zoekt.ParseTemplate(templateText)
849+
if err != nil {
850+
t.Fatal(err)
851+
}
852+
853+
var sb strings.Builder
854+
err = tt.Execute(&sb, map[string]any{
855+
"Version": version,
856+
"Path": path,
857+
"LineNumber": lineNumber,
858+
})
859+
if err != nil {
860+
t.Fatal(err)
861+
}
862+
if got := sb.String(); got != want {
863+
t.Fatalf("want: %q\ngot: %q", want, got)
864+
}
865+
}
866+
867+
var repo zoekt.Repository
868+
u, _ := url.Parse(base)
869+
err := setTemplates(&repo, u, tc.typ)
870+
if err != nil {
871+
t.Fatal(err)
872+
}
873+
assertOutput(repo.CommitURLTemplate, tc.commit)
874+
assertOutput(repo.FileURLTemplate, tc.file)
875+
assertOutput(repo.LineFragmentTemplate, tc.line)
876+
})
877+
}
878+
}
879+
789880
func BenchmarkPrepareNormalBuild(b *testing.B) {
790881
// NOTE: To run the benchmark, download a large repo (like github.com/chromium/chromium/) and change this to its path.
791882
repoDir := "/path/to/your/repo"

indexbuilder.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@ import (
1919
"encoding/binary"
2020
"fmt"
2121
"hash/crc64"
22-
"html/template"
2322
"log"
23+
"net/url"
2424
"os"
2525
"path/filepath"
2626
"sort"
27+
"strings"
28+
"text/template"
2729
"time"
2830
"unicode/utf8"
2931

@@ -216,13 +218,43 @@ type IndexBuilder struct {
216218

217219
func (d *Repository) verify() error {
218220
for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} {
219-
if _, err := template.New("").Parse(t); err != nil {
221+
if _, err := ParseTemplate(t); err != nil {
220222
return err
221223
}
222224
}
223225
return nil
224226
}
225227

228+
func urlJoinPath(base string, elem ...string) string {
229+
// golangs html/template always escapes "+" appearing in an HTML attribute
230+
// [1]. We may even want to treat more characters, differently but this
231+
// atleast makes it possible to visit URLs like [2].
232+
//
233+
// We only do this to elem since base will normally be a hardcoded string.
234+
//
235+
// [1]: https://sourcegraph.com/github.com/golang/go@go1.23.2/-/blob/src/html/template/html.go?L71-80
236+
// [2]: https://github.com/apple/swift-system/blob/main/Sources/System/Util+StringArray.swift
237+
elem = append([]string{}, elem...) // copy to mutate
238+
for i := range elem {
239+
elem[i] = strings.ReplaceAll(elem[i], "+", "%2B")
240+
}
241+
u, err := url.JoinPath(base, elem...)
242+
if err != nil {
243+
return "#!error: " + err.Error()
244+
}
245+
return u
246+
}
247+
248+
// ParseTemplate will parse the templates for FileURLTemplate,
249+
// LineFragmentTemplate and CommitURLTemplate.
250+
//
251+
// It makes available the extra function UrlJoinPath.
252+
func ParseTemplate(text string) (*template.Template, error) {
253+
return template.New("").Funcs(template.FuncMap{
254+
"URLJoinPath": urlJoinPath,
255+
}).Parse(text)
256+
}
257+
226258
// ContentSize returns the number of content bytes so far ingested.
227259
func (b *IndexBuilder) ContentSize() uint32 {
228260
// Add the name too so we don't skip building index if we have

web/e2e_test.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,17 @@ func TestBasic(t *testing.T) {
8686
b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
8787
Name: "name",
8888
URL: "repo-url",
89-
CommitURLTemplate: "{{.Version}}",
90-
FileURLTemplate: "file-url",
91-
LineFragmentTemplate: "#line",
89+
CommitURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/commit/" .Version}}`,
90+
FileURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/blob/" .Version .Path}}`,
91+
LineFragmentTemplate: "#L{{.LineNumber}}",
9292
Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}},
9393
})
9494
if err != nil {
9595
t.Fatalf("NewIndexBuilder: %v", err)
9696
}
9797
if err := b.Add(zoekt.Document{
98-
Name: "f2",
98+
// use a name which requires correct escaping. https://github.com/sourcegraph/zoekt/issues/807
99+
Name: "foo/bar+baz",
99100
Content: []byte("to carry water in the no later bla"),
100101
// --------------0123456789012345678901234567890123
101102
// --------------0 1 2 3
@@ -123,15 +124,15 @@ func TestBasic(t *testing.T) {
123124
for req, needles := range map[string][]string{
124125
"/": {"from 1 repositories"},
125126
"/search?q=water": {
126-
"href=\"file-url#line",
127+
`href="https://github.com/org/repo/blob/1234/foo/bar%2Bbaz"`,
127128
"carry <b>water</b>",
128129
},
129130
"/search?q=r:": {
130131
"1234\">master",
131132
"Found 1 repositories",
132133
nowStr,
133134
"repo-url\">name",
134-
"1 files (36B)",
135+
"1 files (45B)",
135136
},
136137
"/search?q=magic": {
137138
`value=magic`,

web/server.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"strconv"
2929
"strings"
3030
"sync"
31+
texttemplate "text/template"
3132
"time"
3233

3334
"github.com/grafana/regexp"
@@ -116,8 +117,9 @@ type Server struct {
116117

117118
startTime time.Time
118119

119-
templateMu sync.Mutex
120-
templateCache map[string]*template.Template
120+
templateMu sync.Mutex
121+
templateCache map[string]*template.Template
122+
textTemplateCache map[string]*texttemplate.Template
121123

122124
lastStatsMu sync.Mutex
123125
lastStats *zoekt.RepoStats
@@ -134,13 +136,30 @@ func (s *Server) getTemplate(str string) *template.Template {
134136

135137
t, err := template.New("cache").Parse(str)
136138
if err != nil {
137-
log.Printf("template parse error: %v", err)
139+
log.Printf("html template parse error: %v", err)
138140
t = template.Must(template.New("empty").Parse(""))
139141
}
140142
s.templateCache[str] = t
141143
return t
142144
}
143145

146+
func (s *Server) getTextTemplate(str string) *texttemplate.Template {
147+
s.templateMu.Lock()
148+
defer s.templateMu.Unlock()
149+
t := s.textTemplateCache[str]
150+
if t != nil {
151+
return t
152+
}
153+
154+
t, err := zoekt.ParseTemplate(str)
155+
if err != nil {
156+
log.Printf("text template parse error: %v", err)
157+
t = texttemplate.Must(texttemplate.New("empty").Parse(""))
158+
}
159+
s.textTemplateCache[str] = t
160+
return t
161+
}
162+
144163
func NewMux(s *Server) (*http.ServeMux, error) {
145164
s.print = s.Top.Lookup("print")
146165
if s.print == nil {
@@ -162,6 +181,7 @@ func NewMux(s *Server) (*http.ServeMux, error) {
162181
}
163182

164183
s.templateCache = map[string]*template.Template{}
184+
s.textTemplateCache = map[string]*texttemplate.Template{}
165185
s.startTime = time.Now()
166186

167187
mux := http.NewServeMux()
@@ -510,7 +530,7 @@ func (s *Server) serveListReposErr(q query.Q, qStr string, r *http.Request) (*Re
510530
}
511531

512532
for _, r := range repos.Repos {
513-
t := s.getTemplate(r.Repository.CommitURLTemplate)
533+
t := s.getTextTemplate(r.Repository.CommitURLTemplate)
514534

515535
repo := Repository{
516536
Name: r.Repository.Name,

0 commit comments

Comments
 (0)