Skip to content

Commit

Permalink
.Net: issue-10278 : Change ChatPromptParser to enable 0-n text part i…
Browse files Browse the repository at this point in the history
…nstead of single value (#10304)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
See [Issue
#10278](#10278
)

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->
**`ChatPromptParser.cs` :**
Remove method `IsValidChildNodes` and its call in `IsValidChatMessage` :
A chat message is valid as long as it has a role attribute. Messages
with no text child or multiple text children are now valid


**`ChatPromptParserTests` :** 
- Changed 3rd invalid example since the former one is now valid
- Added tests for : Message with multiple text nodes, mixed XML content
and empty XML node
Remark : The expected behavior for mixed XML content is unclear so I
kept it as it was : the content of the message node ends up in a
`TextContent` if and only if the message has no valid text or image
child node.
So for instance, if the prompt has a message that is a mixed XML with
content and a child `image` node, the content would be ignored and the
`ChatMessageContent` object will have only an `ImageContent` item

**Other remark :** 
`ChatMessageContent.Content` property only returns/sets the first
`TextContent` item.
I thought about changing it to : 
- get : return a concatenation of the `TextContent` items separated by
`\n`
- set : set the first `TextContent` element (or add one if there is
none) and remove other `TextContent` items

But its current behavior seems intended (it is even included in some
unit tests) and I felt like such a change would have too much impact
across the code. So I left it as it is. But I think that such a change
could be beneficial.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible : 
I have the same test fails with the unmodified `main` branch. For
instance the test `GettingStarted/Step1_Create_Kernel` fails with
`ConfigurationNotFoundException : Configuration section 'OpenAI' not
found`. I think there are some missing config files
- [x] I didn't break anyone 😄

---------

Co-authored-by: Thomas DUQUENNOY <[email protected]>
Co-authored-by: Mark Wallace <[email protected]>
Co-authored-by: Dmytro Struk <[email protected]>
  • Loading branch information
4 people authored Jan 28, 2025
1 parent 6fbbb44 commit 89cd872
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -112,19 +112,12 @@ private static ChatMessageContent ParseChatNode(PromptNode node)
/// TagName = "message"<br/>
/// Attributes = { "role" : "..." }<br/>
/// optional one or more child nodes <image>...</image><br/>
/// content not null or single child node <text>...</text>
/// optional one or more child nodes <text>...</text>
/// </remarks>
private static bool IsValidChatMessage(PromptNode node)
{
return
node.TagName.Equals(MessageTagName, StringComparison.OrdinalIgnoreCase) &&
node.Attributes.ContainsKey(RoleAttributeName) &&
IsValidChildNodes(node);
}

private static bool IsValidChildNodes(PromptNode node)
{
var textTagsCount = node.ChildNodes.Count(n => n.TagName.Equals(TextTagName, StringComparison.OrdinalIgnoreCase));
return textTagsCount == 1 || (textTagsCount == 0 && node.Content is not null);
node.Attributes.ContainsKey(RoleAttributeName);
}
}
126 changes: 125 additions & 1 deletion dotnet/src/SemanticKernel.UnitTests/Prompt/ChatPromptParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public sealed class ChatPromptParserTests
[Theory]
[InlineData("This is plain prompt")]
[InlineData("<message This is invalid chat prompt>")]
[InlineData("<message role='user'><text>This is invalid</text><text>chat prompt</text></message>")]
[InlineData("<message role='user'><text>This is an invalid chat prompt</message></text>")]
public void ItReturnsNullChatHistoryWhenPromptIsPlainTextOrInvalid(string prompt)
{
// Act
Expand Down Expand Up @@ -148,6 +148,86 @@ public void ItReturnsChatHistoryWithValidDataImageContent()
});
}

[Fact]
public void ItReturnsChatHistoryWithMultipleTextParts()
{
// Arrange
string prompt = GetValidPromptWithMultipleTextParts();

// Act
bool result = ChatPromptParser.TryParse(prompt, out var chatHistory);

// Assert
Assert.True(result);
Assert.NotNull(chatHistory);

Assert.Collection(chatHistory,
c => Assert.Equal("What can I help with?", c.Content),
c =>
{
Assert.Equal("Hello", c.Content);
Assert.Collection(c.Items,
o =>
{
Assert.IsType<TextContent>(o);
Assert.Equal("Hello", ((TextContent)o).Text);
}, o =>
{
Assert.IsType<TextContent>(o);
Assert.Equal("I am user", ((TextContent)o).Text);
});
});
}

[Fact]
public void ItReturnsChatHistoryWithMixedXmlContent()
{
// Arrange
string prompt = GetValidPromptWithMixedXmlContent();

// Act
bool result = ChatPromptParser.TryParse(prompt, out var chatHistory);

// Assert
Assert.True(result);
Assert.NotNull(chatHistory);

Assert.Collection(chatHistory,
c => Assert.Equal("What can I help with?", c.Content),
c =>
{
Assert.Equal("Hi how are you?", c.Content);
Assert.Single(c.Items);
});
}

[Fact]
public void ItReturnsChatHistoryWithEmptyContent()
{
// Arrange
string prompt = GetValidPromptWithEmptyContent();

// Act
bool result = ChatPromptParser.TryParse(prompt, out var chatHistory);

// Assert
Assert.True(result);
Assert.NotNull(chatHistory);

Assert.Collection(chatHistory,
c => Assert.Equal("What can I help with?", c.Content),
c =>
{
Assert.Null(c.Content);
Assert.Empty(c.Items);
},
c =>
{
Assert.Null(c.Content);
Assert.Empty(c.Items);
});
}

[Fact]
public void ItReturnsChatHistoryWithValidContentItemsIncludeCode()
{
Expand Down Expand Up @@ -259,6 +339,50 @@ private static string GetValidPromptWithDataUriImageContent()
""";
}

private static string GetValidPromptWithMultipleTextParts()
{
return
"""
<message role="assistant">What can I help with?</message>
<message role='user'>
<text>Hello</text>
<text>I am user</text>
</message>
""";
}

private static string GetValidPromptWithMixedXmlContent()
{
return
"""
<message role="assistant">What can I help with?</message>
<message role='user'>
This part will be discarded upon parsing
<text>Hi how are you?</text>
This part will also be discarded upon parsing
</message>
""";
}

private static string GetValidPromptWithEmptyContent()
{
return
"""
<message role="assistant">What can I help with?</message>
<message role='user'/>
<message role='user'>
</message>
""";
}

private static string GetValidPromptWithCDataSection()
{
return
Expand Down

0 comments on commit 89cd872

Please sign in to comment.