From 90fea0736d5ed08401e1b9c3969540d4eed5a89c Mon Sep 17 00:00:00 2001 From: Dhruv Maradiya Date: Wed, 25 Dec 2024 16:50:01 +0530 Subject: [PATCH 1/5] Implement handling of
elements in DOM parsing to insert line breaks in text --- pkgs/html/lib/dom_parsing.dart | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pkgs/html/lib/dom_parsing.dart b/pkgs/html/lib/dom_parsing.dart index 69b0bbd4d..540176878 100644 --- a/pkgs/html/lib/dom_parsing.dart +++ b/pkgs/html/lib/dom_parsing.dart @@ -43,7 +43,14 @@ class TreeVisitor { void visitText(Text node) => visitNodeFallback(node); // TODO(jmesserly): visit attributes. - void visitElement(Element node) => visitNodeFallback(node); + void visitElement(Element node) { + if (node.localName == 'br') { + visitText(Text('\n')); + return; + } + + visitNodeFallback(node); + } void visitComment(Comment node) => visitNodeFallback(node); From b6126c1717a73280a998fb92a4a28261aa8207ac Mon Sep 17 00:00:00 2001 From: Dhruv Maradiya Date: Wed, 25 Dec 2024 16:51:12 +0530 Subject: [PATCH 2/5] update tests to handle
elements for line breaks --- pkgs/html/test/parser_feature_test.dart | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkgs/html/test/parser_feature_test.dart b/pkgs/html/test/parser_feature_test.dart index 7156146e0..d21ad375d 100644 --- a/pkgs/html/test/parser_feature_test.dart +++ b/pkgs/html/test/parser_feature_test.dart @@ -265,11 +265,11 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. }); test('Element.text', () { - final doc = parseFragment('
foo
bar
baz
'); + final doc = parseFragment('
foo

bar
baz
'); final e = doc.firstChild!; final text = e.firstChild!; expect((text as Text).data, 'foo'); - expect(e.text, 'foobarbaz'); + expect(e.text, 'foo\nbarbaz\n'); e.text = 'FOO'; expect(e.nodes.length, 1); @@ -279,7 +279,7 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. }); test('Text.text', () { - final doc = parseFragment('
foo
bar
baz
'); + final doc = parseFragment('
foo
bar

baz
'); final e = doc.firstChild!; final text = e.firstChild as Text; expect(text.data, 'foo'); @@ -287,7 +287,7 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. text.text = 'FOO'; expect(text.data, 'FOO'); - expect(e.text, 'FOObarbaz'); + expect(e.text, 'FOObar\nbaz'); expect(text.text, 'FOO'); }); From 9beee028b677960567e5cd320fe58819038c5bec Mon Sep 17 00:00:00 2001 From: Dhruv Maradiya Date: Wed, 25 Dec 2024 17:52:47 +0530 Subject: [PATCH 3/5] update changelog to include fix for
newline handling --- pkgs/html/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkgs/html/CHANGELOG.md b/pkgs/html/CHANGELOG.md index 9a881e9d7..86d702583 100644 --- a/pkgs/html/CHANGELOG.md +++ b/pkgs/html/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.15.5+1 + +- Fix a bug in DOM parsing where `
` tags does not create a new line when html is converted to text. + ## 0.15.5 - Require Dart `3.2`. From 2b721eabd85051613abd0a8465c0608a58c68a0f Mon Sep 17 00:00:00 2001 From: Dhruv Maradiya Date: Mon, 30 Dec 2024 10:51:25 +0530 Subject: [PATCH 4/5] bump version to 0.15.5+1 in html package --- pkgs/html/pubspec.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/html/pubspec.yaml b/pkgs/html/pubspec.yaml index 447b98e18..7508588ad 100644 --- a/pkgs/html/pubspec.yaml +++ b/pkgs/html/pubspec.yaml @@ -1,5 +1,5 @@ name: html -version: 0.15.5 +version: 0.15.5+1 description: APIs for parsing and manipulating HTML content outside the browser. repository: https://github.com/dart-lang/tools/tree/main/pkgs/html issue_tracker: https://github.com/dart-lang/tools/issues?q=is%3Aissue+is%3Aopen+label%3Apackage%3Ahtml From aafc3f92232e211080a3f5c790a871751633d845 Mon Sep 17 00:00:00 2001 From: Dhruv Maradiya Date: Sun, 19 Oct 2025 13:35:21 +0530 Subject: [PATCH 5/5] feat: add spec-compliant textContent with BR conversion flag Implements DOM spec textContent algorithm with optional convertBRsToNewlines parameter. Adds isElementBr() helper for namespace-aware BR detection. Maintains backward compatibility with existing .text getter. --- pkgs/html/lib/dom.dart | 47 +++++++++++++++++++++++-- pkgs/html/lib/dom_parsing.dart | 9 +---- pkgs/html/test/parser_feature_test.dart | 8 +++-- 3 files changed, 52 insertions(+), 12 deletions(-) diff --git a/pkgs/html/lib/dom.dart b/pkgs/html/lib/dom.dart index 0c6b38e58..c3212db8b 100644 --- a/pkgs/html/lib/dom.dart +++ b/pkgs/html/lib/dom.dart @@ -214,6 +214,9 @@ abstract class Node { } // Implemented per: http://dom.spec.whatwg.org/#dom-node-textcontent + String? textContent({bool convertBRsToNewlines = false}) => + _getTextContent(this, convertBRsToNewlines: convertBRsToNewlines); + String? get text => null; set text(String? value) {} @@ -1099,8 +1102,36 @@ class FilteredElementList extends IterableBase } // http://dom.spec.whatwg.org/#dom-node-textcontent -// For Element and DocumentFragment -String _getText(Node node) => (_ConcatTextVisitor()..visit(node)).toString(); +String? _getTextContent(Node node, {bool convertBRsToNewlines = false}) { + // DocumentFragment or Element: return descendant text content + if (node is DocumentFragment || node is Element) { + return _getText(node, convertBRsToNewlines: convertBRsToNewlines); + } + // CharacterData (Text, Comment): return data + if (node is Text) { + return node.data; + } + if (node is Comment) { + return node.data; + } + // Otherwise: return null + return null; +} + +/// Returns true if the element is an HTML
element. +/// Checks both the local name and namespace to ensure it's a proper HTML br element. +/// Note: null namespace is treated as HTML namespace for elements created by the HTML parser. +bool isElementBr(Element element) { + if (element.localName != 'br') return false; + final ns = element.namespaceUri; + return ns == null || ns == Namespaces.html; +} + +// For Element and DocumentFragment (legacy helper) +String _getText(Node node, {bool convertBRsToNewlines = false}) => + (_ConcatTextVisitor(convertBRsToNewlines: convertBRsToNewlines) + ..visit(node)) + .toString(); void _setText(Node node, String? value) { node.nodes.clear(); @@ -1109,6 +1140,9 @@ void _setText(Node node, String? value) { class _ConcatTextVisitor extends TreeVisitor { final _str = StringBuffer(); + final bool convertBRsToNewlines; + + _ConcatTextVisitor({this.convertBRsToNewlines = false}); @override String toString() => _str.toString(); @@ -1117,4 +1151,13 @@ class _ConcatTextVisitor extends TreeVisitor { void visitText(Text node) { _str.write(node.data); } + + @override + void visitElement(Element node) { + if (convertBRsToNewlines && isElementBr(node)) { + _str.write('\n'); + return; + } + super.visitElement(node); + } } diff --git a/pkgs/html/lib/dom_parsing.dart b/pkgs/html/lib/dom_parsing.dart index 540176878..69b0bbd4d 100644 --- a/pkgs/html/lib/dom_parsing.dart +++ b/pkgs/html/lib/dom_parsing.dart @@ -43,14 +43,7 @@ class TreeVisitor { void visitText(Text node) => visitNodeFallback(node); // TODO(jmesserly): visit attributes. - void visitElement(Element node) { - if (node.localName == 'br') { - visitText(Text('\n')); - return; - } - - visitNodeFallback(node); - } + void visitElement(Element node) => visitNodeFallback(node); void visitComment(Comment node) => visitNodeFallback(node); diff --git a/pkgs/html/test/parser_feature_test.dart b/pkgs/html/test/parser_feature_test.dart index d21ad375d..2789afc54 100644 --- a/pkgs/html/test/parser_feature_test.dart +++ b/pkgs/html/test/parser_feature_test.dart @@ -269,7 +269,9 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. final e = doc.firstChild!; final text = e.firstChild!; expect((text as Text).data, 'foo'); - expect(e.text, 'foo\nbarbaz\n'); + expect(e.text, 'foobarbaz'); + expect(e.textContent(convertBRsToNewlines: true), 'foo\nbarbaz\n'); + expect(e.textContent(convertBRsToNewlines: false), 'foobarbaz'); e.text = 'FOO'; expect(e.nodes.length, 1); @@ -287,7 +289,9 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. text.text = 'FOO'; expect(text.data, 'FOO'); - expect(e.text, 'FOObar\nbaz'); + expect(e.text, 'FOObarbaz'); + expect(e.textContent(convertBRsToNewlines: true), 'FOObar\nbaz'); + expect(e.textContent(convertBRsToNewlines: false), 'FOObarbaz'); expect(text.text, 'FOO'); });