Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions dotnet/src/SemanticKernel.Abstractions/AI/XmlPromptParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Web;
using System.Xml;

Expand Down Expand Up @@ -38,9 +39,14 @@ public static bool TryParse(string prompt, [NotNullWhen(true)] out List<PromptNo
return false;
}

var xmlDocument = new XmlDocument();
var xmlDocument = new XmlDocument()
{
PreserveWhitespace = true
};

try
{

xmlDocument.LoadXml($"<root>{prompt}</root>");
}
catch (XmlException)
Expand Down Expand Up @@ -70,8 +76,17 @@ public static bool TryParse(string prompt, [NotNullWhen(true)] out List<PromptNo
return null;
}

var isCData = node.FirstChild?.Name.Equals("#cdata-section", StringComparison.OrdinalIgnoreCase) ?? false;
var nodeContent = isCData ? node.InnerText.Trim() : node.InnerXml.Trim();
// Since we're preserving whitespace for the contents within each XMLNode, we
// need to skip any whitespace nodes at the front of the children.
var firstNonWhitespaceChild = node.ChildNodes
.Cast<XmlNode>()
.Where(n => n.NodeType != XmlNodeType.Whitespace)
.FirstOrDefault();

var isCData = firstNonWhitespaceChild?.NodeType == XmlNodeType.CDATA;
var nodeContent = isCData
? node.InnerText.Trim()
: node.InnerXml.Trim();

var promptNode = new PromptNode(node.Name)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,55 @@ public void ItReturnsChatHistoryWithValidContentItemsIncludeCData()
""", c.Content));
}

[Fact]
public void ItReturnsChatHistoryWithValidContentItemsIncludeCode()
{
// Arrange
string prompt = GetValidPromptWithCodeBlock();

// Act
bool result = ChatPromptParser.TryParse(prompt, out var chatHistory);

// Assert
Assert.True(result);
Assert.NotNull(chatHistory);

Assert.Collection(chatHistory,
// The first message entry inside prompt is neither wrapped in CDATA or HtmlEncoded, so the single quotes are not preserved.
c => Assert.Equal("""
<code>
<message role="system">
<text>Text content</text>
</message>
</code>
""", c.Content),
// Since the second message entry inside prompt is wrapped in CDATA, the single quotes are preserved.
c => Assert.Equal("""
<code>
<message role='system'>
<text>Text content</text>
</message>
</code>
""", c.Content),
// Since the third message entry inside prompt is HtmlEncoded, the single quotes are preserved.
c => Assert.Equal("""
<code>
<message role='system'>
<text>Text content</text>
</message>
</code>
""", c.Content),
// In this case, when we trim node.InnerXml only the opening <code> tag is indented.
c => Assert.Equal("""
<code>
<text>explain image</text>
<image>
https://fake-link-to-image/
</image>
</code>
""", c.Content));
}

private static string GetSimpleValidPrompt()
{
return
Expand Down Expand Up @@ -181,4 +230,47 @@ private static string GetValidPromptWithCDataSection()

""";
}

private static string GetValidPromptWithCodeBlock()
{
return
"""

<message role="assistant">
<code>
<message role='system'>
<text>Text content</text>
</message>
</code>
</message>

<message role="assistant">
<![CDATA[
<code>
<message role='system'>
<text>Text content</text>
</message>
</code>
]]>
</message>

<message role="assistant">
&lt;code&gt;
&lt;message role=&#39;system&#39;&gt;
&lt;text&gt;Text content&lt;/text&gt;
&lt;/message&gt;
&lt;/code&gt;
</message>

<message role='user'>
<code>
<text>explain image</text>
<image>
https://fake-link-to-image/
</image>
</code>
</message>

""";
}
}