From 93ece90f5ddb2eb44344db78d51479cad3bc10e5 Mon Sep 17 00:00:00 2001 From: Alexis Colin Date: Sun, 20 Oct 2024 22:06:45 +0900 Subject: [PATCH] feat(gnoweb): disable html in markdown (#2964) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In line with our vision for a HTML-free Gnoweb, this PR disables the rendering of HTML within Markdown content. It replaces all elements recognized as HTML tags (e.g., `
`, `
`, ``) with empty spaces before sending the content to the front-end. The parsing still happens via JavaScript, but now without any HTML tags. However, HTML tags like ``` `
lorem ipsum
` ``` can still appear within code blocks in Markdown, as usual, but won’t be parsed/read as actual HTML. Additionally, this feature is controlled by the `gnoweb` boolean flag `with-html`, which defaults to `false`. cc @gfanton > [!WARNING] > Enabling this feature will break the design of gno.land realms (and any other realms relying on HTML), since current layout elements like `columns`, `stacks`, or `jumbotrons`... are built with HTML. We will need to adopt the new design system expected with `gnoweb2.0` (or future `gnoweb1` improvements). ### BEFORE: ![127 0 0 1_8888_ (1)](https://github.com/user-attachments/assets/04328db4-7076-4690-9727-50c33f58954d) ### AFTER: ![127 0 0 1_8888_ (2)](https://github.com/user-attachments/assets/b8d9532c-45e6-4a78-b166-2f6d0176bd10) --- ### BEFORE: ![127 0 0 1_8888_r_gnoland_pages](https://github.com/user-attachments/assets/445cbb3f-565b-42af-b794-14f9c682d4ce) ### AFTER: ![127 0 0 1_8888_r_gnoland_pages (1)](https://github.com/user-attachments/assets/ac7131f1-c2d4-42ea-a426-66244782e910) --------- Co-authored-by: Manfred Touron <94029+moul@users.noreply.github.com> --- contribs/gnodev/go.mod | 2 +- gno.land/cmd/gnoweb/main.go | 1 + gno.land/pkg/gnoweb/gnoweb.go | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/contribs/gnodev/go.mod b/contribs/gnodev/go.mod index f4859889a16..c419f968d4a 100644 --- a/contribs/gnodev/go.mod +++ b/contribs/gnodev/go.mod @@ -1,6 +1,6 @@ module github.com/gnolang/gno/contribs/gnodev -go 1.22 +go 1.22.0 replace github.com/gnolang/gno => ../.. diff --git a/gno.land/cmd/gnoweb/main.go b/gno.land/cmd/gnoweb/main.go index 547134548ff..5cec7257ebe 100644 --- a/gno.land/cmd/gnoweb/main.go +++ b/gno.land/cmd/gnoweb/main.go @@ -37,6 +37,7 @@ func runMain(args []string) error { fs.StringVar(&cfg.HelpRemote, "help-remote", cfg.HelpRemote, "help page's remote addr") fs.BoolVar(&cfg.WithAnalytics, "with-analytics", cfg.WithAnalytics, "enable privacy-first analytics") fs.StringVar(&bindAddress, "bind", "127.0.0.1:8888", "server listening address") + fs.BoolVar(&cfg.WithHTML, "with-html", cfg.WithHTML, "Enable HTML parsing in markdown rendering") if err := fs.Parse(args); err != nil { return err diff --git a/gno.land/pkg/gnoweb/gnoweb.go b/gno.land/pkg/gnoweb/gnoweb.go index 3e6249cf126..c0bc24ce216 100644 --- a/gno.land/pkg/gnoweb/gnoweb.go +++ b/gno.land/pkg/gnoweb/gnoweb.go @@ -13,6 +13,7 @@ import ( "net/url" "os" "path/filepath" + "regexp" "runtime" "strings" "time" @@ -45,6 +46,7 @@ type Config struct { HelpChainID string HelpRemote string WithAnalytics bool + WithHTML bool } func NewDefaultConfig() Config { @@ -56,6 +58,7 @@ func NewDefaultConfig() Config { HelpChainID: "dev", HelpRemote: "127.0.0.1:26657", WithAnalytics: false, + WithHTML: false, } } @@ -109,6 +112,34 @@ func MakeApp(logger *slog.Logger, cfg Config) gotuna.App { return app } +var ( + inlineCodePattern = regexp.MustCompile("`[^`]*`") + htmlTagPattern = regexp.MustCompile(`<\/?\w+[^>]*?>`) +) + +func sanitizeContent(cfg *Config, content string) string { + if cfg.WithHTML { + return content + } + + placeholders := map[string]string{} + contentWithPlaceholders := inlineCodePattern.ReplaceAllStringFunc(content, func(match string) string { + placeholder := fmt.Sprintf("__GNOMDCODE_%d__", len(placeholders)) + placeholders[placeholder] = match + return placeholder + }) + + sanitizedContent := htmlTagPattern.ReplaceAllString(contentWithPlaceholders, "") + + if len(placeholders) > 0 { + for placeholder, code := range placeholders { + sanitizedContent = strings.ReplaceAll(sanitizedContent, placeholder, code) + } + } + + return sanitizedContent +} + // handlerRealmAlias is used to render official pages from realms. // url is intended to be shorter. // UX is intended to be more minimalistic. @@ -151,7 +182,7 @@ func handlerRealmAlias(logger *slog.Logger, app gotuna.App, cfg *Config, rlmpath tmpl.Set("RealmPath", rlmpath) tmpl.Set("Query", querystr) tmpl.Set("PathLinks", pathLinks) - tmpl.Set("Contents", string(res.Data)) + tmpl.Set("Contents", sanitizeContent(cfg, string(res.Data))) tmpl.Set("Config", cfg) tmpl.Set("IsAlias", true) tmpl.Render(w, r, "realm_render.html", "funcs.html") @@ -339,7 +370,7 @@ func handleRealmRender(logger *slog.Logger, app gotuna.App, cfg *Config, w http. tmpl.Set("RealmPath", rlmpath) tmpl.Set("Query", querystr) tmpl.Set("PathLinks", pathLinks) - tmpl.Set("Contents", string(res.Data)) + tmpl.Set("Contents", sanitizeContent(cfg, string(res.Data))) tmpl.Set("Config", cfg) tmpl.Set("HasReadme", hasReadme) tmpl.Render(w, r, "realm_render.html", "funcs.html")