Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TA] Add encoding support and length and offset #14719

Merged
merged 3 commits into from
Sep 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- It defaults to the latest supported API version, which currently is `3.1-preview.2`.
- `ErrorCode` value returned from the service is now surfaced in `RequestFailedException`.
- Support added for Opinion Mining. This feature is available in the Text Analytics service v3.1-preview.1 and above.
- Added `Offset` and `Length` properties for `CategorizedEntity`, `SentenceSentiment`, and `LinkedEntityMatch`. The default encoding is UTF-16 code units. For additional information see https://aka.ms/text-analytics-offsets
- `TextAnalyticsError` and `TextAnalyticsWarning` now are marked as immutable.

## 5.0.0 (2020-07-27)
Expand Down
9 changes: 6 additions & 3 deletions sdk/textanalytics/Azure.AI.TextAnalytics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ CategorizedEntityCollection entities = client.RecognizeEntities(document);
Console.WriteLine($"Recognized {entities.Count} entities:");
foreach (CategorizedEntity entity in entities)
{
Console.WriteLine($"Text: {entity.Text}, Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}");
Console.WriteLine($"Text: {entity.Text}, Offset (in UTF-16 code units): {entity.Offset}, Length (in UTF-16 code units): {entity.Length}");
Console.WriteLine($"Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}");
}
```
For samples on using the production recommended option `RecognizeEntitiesBatch` see [here][recognize_entities_sample].
Expand All @@ -211,7 +212,8 @@ foreach (LinkedEntity linkedEntity in linkedEntities)
Console.WriteLine($"Name: {linkedEntity.Name}, Language: {linkedEntity.Language}, Data Source: {linkedEntity.DataSource}, Url: {linkedEntity.Url.ToString()}, Entity Id in Data Source: {linkedEntity.DataSourceEntityId}");
foreach (LinkedEntityMatch match in linkedEntity.Matches)
{
Console.WriteLine($" Match Text: {match.Text}, Confidence score: {match.ConfidenceScore}");
Console.WriteLine($" Match Text: {match.Text}, Offset (in UTF-16 code units): {match.Offset}, Length (in UTF-16 code units): {match.Length}");
Console.WriteLine($" Confidence score: {match.ConfidenceScore}");
}
}
```
Expand Down Expand Up @@ -241,7 +243,8 @@ CategorizedEntityCollection entities = await client.RecognizeEntitiesAsync(docum
Console.WriteLine($"Recognized {entities.Count} entities:");
foreach (CategorizedEntity entity in entities)
{
Console.WriteLine($"Text: {entity.Text}, Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}");
Console.WriteLine($"Text: {entity.Text}, Offset (in UTF-16 code units): {entity.Offset}, Length (in UTF-16 code units): {entity.Length}");
Console.WriteLine($"Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}");
}
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ public readonly partial struct AspectSentiment
private readonly object _dummy;
private readonly int _dummyPrimitive;
public Azure.AI.TextAnalytics.SentimentConfidenceScores ConfidenceScores { get { throw null; } }
public int Length { get { throw null; } }
public int Offset { get { throw null; } }
maririos marked this conversation as resolved.
Show resolved Hide resolved
public Azure.AI.TextAnalytics.TextSentiment Sentiment { get { throw null; } }
public string Text { get { throw null; } }
}
Expand All @@ -32,6 +34,8 @@ public readonly partial struct CategorizedEntity
private readonly int _dummyPrimitive;
public Azure.AI.TextAnalytics.EntityCategory Category { get { throw null; } }
public double ConfidenceScore { get { throw null; } }
public int Length { get { throw null; } }
public int Offset { get { throw null; } }
public string SubCategory { get { throw null; } }
public string Text { get { throw null; } }
}
Expand Down Expand Up @@ -144,6 +148,8 @@ public readonly partial struct LinkedEntityMatch
private readonly object _dummy;
private readonly int _dummyPrimitive;
public double ConfidenceScore { get { throw null; } }
public int Length { get { throw null; } }
public int Offset { get { throw null; } }
public string Text { get { throw null; } }
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
Expand All @@ -161,6 +167,8 @@ public readonly partial struct OpinionSentiment
private readonly int _dummyPrimitive;
public Azure.AI.TextAnalytics.SentimentConfidenceScores ConfidenceScores { get { throw null; } }
public bool IsNegated { get { throw null; } }
public int Length { get { throw null; } }
public int Offset { get { throw null; } }
public Azure.AI.TextAnalytics.TextSentiment Sentiment { get { throw null; } }
public string Text { get { throw null; } }
}
Expand Down Expand Up @@ -192,7 +200,9 @@ public readonly partial struct SentenceSentiment
private readonly object _dummy;
private readonly int _dummyPrimitive;
public Azure.AI.TextAnalytics.SentimentConfidenceScores ConfidenceScores { get { throw null; } }
public int Length { get { throw null; } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.MinedOpinion> MinedOpinions { get { throw null; } }
public int Offset { get { throw null; } }
public Azure.AI.TextAnalytics.TextSentiment Sentiment { get { throw null; } }
public string Text { get { throw null; } }
}
Expand Down Expand Up @@ -319,8 +329,10 @@ public static partial class TextAnalyticsModelFactory
public static Azure.AI.TextAnalytics.AnalyzeSentimentResult AnalyzeSentimentResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.AnalyzeSentimentResult AnalyzeSentimentResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.DocumentSentiment documentSentiment) { throw null; }
public static Azure.AI.TextAnalytics.AnalyzeSentimentResultCollection AnalyzeSentimentResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.AnalyzeSentimentResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
public static Azure.AI.TextAnalytics.AspectSentiment AspectSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double negativeScore) { throw null; }
public static Azure.AI.TextAnalytics.AspectSentiment AspectSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double negativeScore, int offset, int length) { throw null; }
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public static Azure.AI.TextAnalytics.CategorizedEntity CategorizedEntity(string text, string category, string subCategory, double score) { throw null; }
public static Azure.AI.TextAnalytics.CategorizedEntity CategorizedEntity(string text, string category, string subCategory, double score, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.CategorizedEntityCollection CategorizedEntityCollection(System.Collections.Generic.IList<Azure.AI.TextAnalytics.CategorizedEntity> entities, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
public static Azure.AI.TextAnalytics.DetectedLanguage DetectedLanguage(string name, string iso6391Name, double confidenceScore, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
public static Azure.AI.TextAnalytics.DetectLanguageResult DetectLanguageResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
Expand All @@ -333,9 +345,11 @@ public static partial class TextAnalyticsModelFactory
public static Azure.AI.TextAnalytics.KeyPhraseCollection KeyPhraseCollection(System.Collections.Generic.IList<string> keyPhrases, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
public static Azure.AI.TextAnalytics.LinkedEntity LinkedEntity(string name, string dataSourceEntityId, string language, string dataSource, System.Uri url, System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.LinkedEntityMatch> matches) { throw null; }
public static Azure.AI.TextAnalytics.LinkedEntityCollection LinkedEntityCollection(System.Collections.Generic.IList<Azure.AI.TextAnalytics.LinkedEntity> entities, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public static Azure.AI.TextAnalytics.LinkedEntityMatch LinkedEntityMatch(string text, double score) { throw null; }
public static Azure.AI.TextAnalytics.LinkedEntityMatch LinkedEntityMatch(string text, double score, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.MinedOpinion MinedOpinion(Azure.AI.TextAnalytics.AspectSentiment aspect, System.Collections.Generic.IReadOnlyList<Azure.AI.TextAnalytics.OpinionSentiment> opinions) { throw null; }
public static Azure.AI.TextAnalytics.OpinionSentiment OpinionSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, double positiveScore, double negativeScore, string text, bool isNegated) { throw null; }
public static Azure.AI.TextAnalytics.OpinionSentiment OpinionSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, double positiveScore, double negativeScore, string text, bool isNegated, int offset, int length) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResult RecognizeEntitiesResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.CategorizedEntityCollection entities) { throw null; }
public static Azure.AI.TextAnalytics.RecognizeEntitiesResultCollection RecognizeEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizeEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
Expand All @@ -344,7 +358,7 @@ public static partial class TextAnalyticsModelFactory
public static Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResultCollection RecognizeLinkedEntitiesResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesResult> list, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string modelVersion) { throw null; }
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore) { throw null; }
public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore, System.Collections.Generic.IReadOnlyList<Azure.AI.TextAnalytics.MinedOpinion> minedOpinions) { throw null; }
public static Azure.AI.TextAnalytics.SentenceSentiment SentenceSentiment(Azure.AI.TextAnalytics.TextSentiment sentiment, string text, double positiveScore, double neutralScore, double negativeScore, int offset, int length, System.Collections.Generic.IReadOnlyList<Azure.AI.TextAnalytics.MinedOpinion> minedOpinions) { throw null; }
public static Azure.AI.TextAnalytics.SentimentConfidenceScores SentimentConfidenceScores(double positiveScore, double neutralScore, double negativeScore) { throw null; }
public static Azure.AI.TextAnalytics.TextAnalyticsError TextAnalyticsError(string code, string message, string target = null) { throw null; }
public static Azure.AI.TextAnalytics.TextAnalyticsWarning TextAnalyticsWarning(string code, string message) { throw null; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ CategorizedEntityCollection entities = client.RecognizeEntities(document);
Console.WriteLine($"Recognized {entities.Count} entities:");
foreach (CategorizedEntity entity in entities)
{
Console.WriteLine($"Text: {entity.Text}, Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}");
Console.WriteLine($"Text: {entity.Text}, Offset (in UTF-16 code units): {entity.Offset}, Length (in UTF-16 code units): {entity.Length}");
Console.WriteLine($"Category: {entity.Category}, SubCategory: {entity.SubCategory}, Confidence score: {entity.ConfidenceScore}");
}
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ foreach (LinkedEntity linkedEntity in linkedEntities)
Console.WriteLine($"Name: {linkedEntity.Name}, Language: {linkedEntity.Language}, Data Source: {linkedEntity.DataSource}, Url: {linkedEntity.Url.ToString()}, Entity Id in Data Source: {linkedEntity.DataSourceEntityId}");
foreach (LinkedEntityMatch match in linkedEntity.Matches)
{
Console.WriteLine($" Match Text: {match.Text}, Confidence score: {match.ConfidenceScore}");
Console.WriteLine($" Match Text: {match.Text}, Offset (in UTF-16 code units): {match.Offset}, Length (in UTF-16 code units): {match.Length}");
Console.WriteLine($" Confidence score: {match.ConfidenceScore}");
}
}
```
Expand Down
16 changes: 15 additions & 1 deletion sdk/textanalytics/Azure.AI.TextAnalytics/src/AspectSentiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ public readonly struct AspectSentiment
{
private const double _neutralValue = 0d;

internal AspectSentiment(TextSentiment sentiment, string text, double positiveScore, double negativeScore)
internal AspectSentiment(TextSentiment sentiment, string text, double positiveScore, double negativeScore, int offset, int length)
{
Sentiment = sentiment;
Text = text;
ConfidenceScores = new SentimentConfidenceScores(positiveScore, _neutralValue, negativeScore);
Offset = offset;
Length = length;
}

internal AspectSentiment(SentenceAspect sentenceAspect)
Expand All @@ -31,6 +33,8 @@ internal AspectSentiment(SentenceAspect sentenceAspect)
Text = sentenceAspect.Text;
ConfidenceScores = new SentimentConfidenceScores(sentenceAspect.ConfidenceScores.Positive, _neutralValue, sentenceAspect.ConfidenceScores.Negative);
Sentiment = (TextSentiment)Enum.Parse(typeof(TextSentiment), sentenceAspect.Sentiment, ignoreCase: true);
Offset = sentenceAspect.Offset;
Length = sentenceAspect.Length;
}

/// <summary>
Expand All @@ -50,5 +54,15 @@ internal AspectSentiment(SentenceAspect sentenceAspect)
/// Higher values signify higher confidence.
/// </summary>
public SentimentConfidenceScores ConfidenceScores { get; }

/// <summary>
/// Gets the starting position (in UTF-16 code units) for the aspect text.
/// </summary>
public int Offset { get; }

/// <summary>
/// Gets the length (in UTF-16 code units) of the aspect text.
/// </summary>
public int Length { get; }
}
}
12 changes: 12 additions & 0 deletions sdk/textanalytics/Azure.AI.TextAnalytics/src/CategorizedEntity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ internal CategorizedEntity(Entity entity)
Text = entity.Text;
SubCategory = entity.Subcategory;
ConfidenceScore = entity.ConfidenceScore;
Offset = entity.Offset;
Length = entity.Length;
}

/// <summary>
Expand Down Expand Up @@ -50,5 +52,15 @@ internal CategorizedEntity(Entity entity)
/// text substring matches this inferred entity.
/// </summary>
public double ConfidenceScore { get; }

/// <summary>
/// Gets the starting position (in UTF-16 code units) for the matching text in the input document.
/// </summary>
public int Offset { get; }

/// <summary>
/// Gets the length (in UTF-16 code units) of the matching text in the input document.
/// </summary>
public int Length { get; }
}
}
15 changes: 10 additions & 5 deletions sdk/textanalytics/Azure.AI.TextAnalytics/src/LinkedEntityMatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,14 @@ internal LinkedEntityMatch(double confidenceScore, string text, int offset, int
/// </summary>
public double ConfidenceScore { get; }

/// <summary> Start position for the entity match text. </summary>
private int Offset { get; }
/// <summary> Length for the entity match text. </summary>
private int Length { get; }
}
/// <summary>
/// Gets the starting position (in UTF-16 code units) for the matching text in the document.
/// </summary>
public int Offset { get; }

/// <summary>
/// Gets the length (in UTF-16 code units) of the matching text in the document.
/// </summary>
public int Length { get; }
}
}
16 changes: 15 additions & 1 deletion sdk/textanalytics/Azure.AI.TextAnalytics/src/OpinionSentiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ public readonly struct OpinionSentiment
{
private const double _neutralValue = 0d;

internal OpinionSentiment(TextSentiment sentiment, double positiveScore, double negativeScore, string text, bool isNegated)
internal OpinionSentiment(TextSentiment sentiment, double positiveScore, double negativeScore, string text, bool isNegated, int offset, int length)
{
Sentiment = sentiment;
ConfidenceScores = new SentimentConfidenceScores(positiveScore, _neutralValue, negativeScore);
Text = text;
IsNegated = isNegated;
Offset = offset;
Length = length;
}

internal OpinionSentiment(SentenceOpinion opinion)
Expand All @@ -31,6 +33,8 @@ internal OpinionSentiment(SentenceOpinion opinion)
ConfidenceScores = new SentimentConfidenceScores(opinion.ConfidenceScores.Positive, _neutralValue, opinion.ConfidenceScores.Negative);
Sentiment = (TextSentiment)Enum.Parse(typeof(TextSentiment), opinion.Sentiment, ignoreCase: true);
IsNegated = opinion.IsNegated;
Offset = opinion.Offset;
Length = opinion.Length;
}

/// <summary>
Expand All @@ -57,5 +61,15 @@ internal OpinionSentiment(SentenceOpinion opinion)
/// "The food is not good", the opinion "good" is negated.
/// </summary>
public bool IsNegated { get; }

/// <summary>
/// Gets the starting position (in UTF-16 code units) for the opinion text.
/// </summary>
public int Offset { get; }

/// <summary>
/// Gets the length (in UTF-16 code units) of the opinion text.
/// </summary>
public int Length { get; }
}
}
Loading