Skip to content

Split sanitizer functions and fine-tune some tests #31192

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions modules/markup/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,18 @@ func TestRender_links(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer))
}
// Text that should be turned into URL

defaultCustom := setting.Markdown.CustomURLSchemes
oldCustomURLSchemes := setting.Markdown.CustomURLSchemes
markup.ResetDefaultSanitizerForTesting()
defer func() {
setting.Markdown.CustomURLSchemes = oldCustomURLSchemes
markup.ResetDefaultSanitizerForTesting()
markup.CustomLinkURLSchemes(oldCustomURLSchemes)
}()
setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"}
markup.InitializeSanitizer()
markup.CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)

// Text that should be turned into URL
test(
"https://www.example.com",
`<p><a href="https://www.example.com" rel="nofollow">https://www.example.com</a></p>`)
Expand Down Expand Up @@ -259,11 +264,6 @@ func TestRender_links(t *testing.T) {
test(
"ftps://gitea.com",
`<p>ftps://gitea.com</p>`)

// Restore previous settings
setting.Markdown.CustomURLSchemes = defaultCustom
markup.InitializeSanitizer()
markup.CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
}

func TestRender_email(t *testing.T) {
Expand Down
1 change: 0 additions & 1 deletion modules/markup/renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ func Init(ph *ProcessorHelper) {
DefaultProcessorHelper = *ph
}

NewSanitizer()
if len(setting.Markdown.CustomURLSchemes) > 0 {
CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
}
Expand Down
222 changes: 21 additions & 201 deletions modules/markup/sanitizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,9 @@
package markup

import (
"io"
"net/url"
"regexp"
"sync"

"code.gitea.io/gitea/modules/setting"

"github.com/microcosm-cc/bluemonday"
)

Expand All @@ -21,211 +17,35 @@ type Sanitizer struct {
defaultPolicy *bluemonday.Policy
descriptionPolicy *bluemonday.Policy
rendererPolicies map[string]*bluemonday.Policy
init sync.Once
allowAllRegex *regexp.Regexp
}

var (
sanitizer = &Sanitizer{}
allowAllRegex = regexp.MustCompile(".+")
defaultSanitizer *Sanitizer
defaultSanitizerOnce sync.Once
)

// NewSanitizer initializes sanitizer with allowed attributes based on settings.
// Multiple calls to this function will only create one instance of Sanitizer during
// entire application lifecycle.
func NewSanitizer() {
sanitizer.init.Do(func() {
InitializeSanitizer()
})
}

// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings
func InitializeSanitizer() {
sanitizer.rendererPolicies = map[string]*bluemonday.Policy{}
sanitizer.defaultPolicy = createDefaultPolicy()
sanitizer.descriptionPolicy = createRepoDescriptionPolicy()

for name, renderer := range renderers {
sanitizerRules := renderer.SanitizerRules()
if len(sanitizerRules) > 0 {
policy := createDefaultPolicy()
addSanitizerRules(policy, sanitizerRules)
sanitizer.rendererPolicies[name] = policy
}
}
}

func createDefaultPolicy() *bluemonday.Policy {
policy := bluemonday.UGCPolicy()

// For JS code copy and Mermaid loading state
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre")

// For code preview
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally()
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td")
policy.AllowAttrs("data-line-number").OnElements("span")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div")

// For code preview (unicode escape)
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span")
policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span")

// For color preview
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span")

// For attention
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg")
policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg")
policy.AllowAttrs("fill-rule", "d").OnElements("path")

// For Chroma markdown plugin
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code")

// Checkboxes
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")

// Custom URL-Schemes
if len(setting.Markdown.CustomURLSchemes) > 0 {
policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
} else {
policy.AllowURLSchemesMatching(allowAllRegex)

// Even if every scheme is allowed, these three are blocked for security reasons
disallowScheme := func(*url.URL) bool {
return false
func GetDefaultSanitizer() *Sanitizer {
defaultSanitizerOnce.Do(func() {
defaultSanitizer = &Sanitizer{
rendererPolicies: map[string]*bluemonday.Policy{},
allowAllRegex: regexp.MustCompile(".+"),
}
policy.AllowURLSchemeWithCustomPolicy("javascript", disallowScheme)
policy.AllowURLSchemeWithCustomPolicy("vbscript", disallowScheme)
policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme)
}

// Allow classes for anchors
policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a")

// Allow classes for task lists
policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")

// Allow classes for org mode list item status.
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li")

// Allow icons
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")

// Allow classes for emojis
policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")

// Allow icons, emojis, chroma syntax and keyword markup on span
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")

// Allow 'color' and 'background-color' properties for the style attribute on text elements.
policy.AllowStyles("color", "background-color").OnElements("span", "p")

// Allow generally safe attributes
generalSafeAttrs := []string{
"abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt",
"aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
"axis", "border", "cellpadding", "cellspacing", "char",
"charoff", "charset", "checked",
"clear", "cols", "colspan", "color",
"compact", "coords", "datetime", "dir",
"disabled", "enctype", "for", "frame",
"headers", "height", "hreflang",
"hspace", "ismap", "label", "lang",
"maxlength", "media", "method",
"multiple", "name", "nohref", "noshade",
"nowrap", "open", "prompt", "readonly", "rel", "rev",
"rows", "rowspan", "rules", "scope",
"selected", "shape", "size", "span",
"start", "summary", "tabindex", "target",
"title", "type", "usemap", "valign", "value",
"vspace", "width", "itemprop",
}

generalSafeElements := []string{
"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
"div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
"dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
"details", "caption", "figure", "figcaption",
"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
}

policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)

policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")

policy.AllowAttrs("itemscope", "itemtype").OnElements("div")

// FIXME: Need to handle longdesc in img but there is no easy way to do it

// Custom keyword markup
addSanitizerRules(policy, setting.ExternalSanitizerRules)

return policy
}

// createRepoDescriptionPolicy returns a minimal more strict policy that is used for
// repository descriptions.
func createRepoDescriptionPolicy() *bluemonday.Policy {
policy := bluemonday.NewPolicy()

// Allow italics and bold.
policy.AllowElements("i", "b", "em", "strong")

// Allow code.
policy.AllowElements("code")

// Allow links
policy.AllowAttrs("href", "target", "rel").OnElements("a")

// Allow classes for emojis
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^emoji$`)).OnElements("img", "span")
policy.AllowAttrs("aria-label").OnElements("span")

return policy
}

func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) {
for _, rule := range rules {
if rule.AllowDataURIImages {
policy.AllowDataURIImages()
}
if rule.Element != "" {
if rule.Regexp != nil {
policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
} else {
policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
for name, renderer := range renderers {
sanitizerRules := renderer.SanitizerRules()
if len(sanitizerRules) > 0 {
policy := defaultSanitizer.createDefaultPolicy()
defaultSanitizer.addSanitizerRules(policy, sanitizerRules)
defaultSanitizer.rendererPolicies[name] = policy
}
}
}
}

// SanitizeDescription sanitizes the HTML generated for a repository description.
func SanitizeDescription(s string) string {
NewSanitizer()
return sanitizer.descriptionPolicy.Sanitize(s)
}

// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
func Sanitize(s string) string {
NewSanitizer()
return sanitizer.defaultPolicy.Sanitize(s)
defaultSanitizer.defaultPolicy = defaultSanitizer.createDefaultPolicy()
defaultSanitizer.descriptionPolicy = defaultSanitizer.createRepoDescriptionPolicy()
})
return defaultSanitizer
}

// SanitizeReader sanitizes a Reader
func SanitizeReader(r io.Reader, renderer string, w io.Writer) error {
NewSanitizer()
policy, exist := sanitizer.rendererPolicies[renderer]
if !exist {
policy = sanitizer.defaultPolicy
}
return policy.SanitizeReaderToWriter(r, w)
func ResetDefaultSanitizerForTesting() {
defaultSanitizer = nil
defaultSanitizerOnce = sync.Once{}
}
25 changes: 25 additions & 0 deletions modules/markup/sanitizer_custom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package markup

import (
"code.gitea.io/gitea/modules/setting"

"github.com/microcosm-cc/bluemonday"
)

func (st *Sanitizer) addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) {
for _, rule := range rules {
if rule.AllowDataURIImages {
policy.AllowDataURIImages()
}
if rule.Element != "" {
if rule.Regexp != nil {
policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
} else {
policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
}
}
}
}
Loading