-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[text analytics] opinion mining support #12542
Changes from 11 commits
7755968
43e72dc
a3b4510
32ddecc
ea9fca3
590dd11
399571a
1ef46f9
858e70d
e83b145
ffe563d
09cf3fd
7ee99e0
e9fee50
77c7fa8
87b4a80
83426aa
7dc065c
273fb94
4180ad0
5b804d2
f03b477
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
from azure.core.credentials import AzureKeyCredential | ||
from ._policies import TextAnalyticsResponseHookPolicy | ||
from ._user_agent import USER_AGENT | ||
from ._multiapi import load_generated_api | ||
from ._multiapi import load_generated_api, ApiVersion | ||
|
||
def _authentication_policy(credential): | ||
authentication_policy = None | ||
|
@@ -26,8 +26,8 @@ def _authentication_policy(credential): | |
|
||
class TextAnalyticsClientBase(object): | ||
def __init__(self, endpoint, credential, **kwargs): | ||
api_version = kwargs.pop("api_version", None) | ||
_TextAnalyticsClient = load_generated_api(api_version) | ||
self._api_version = kwargs.pop("api_version", ApiVersion.V3_0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm going to make a separate PR to default to v3.1-preview.1. For this PR, we had issues with |
||
_TextAnalyticsClient = load_generated_api(self._api_version) | ||
self._client = _TextAnalyticsClient( | ||
endpoint=endpoint, | ||
credential=credential, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,9 +3,11 @@ | |
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
# ------------------------------------ | ||
|
||
from ._generated.v3_0.models._models import LanguageInput | ||
from ._generated.v3_0.models._models import MultiLanguageInput | ||
import re | ||
from ._generated.v3_0.models._models import ( | ||
LanguageInput, | ||
MultiLanguageInput | ||
) | ||
|
||
|
||
class DictMixin(object): | ||
|
@@ -635,19 +637,31 @@ class SentenceSentiment(DictMixin): | |
and 1 for the sentence for all labels. | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar aspects: The list of aspects in this sentence. An aspect is a | ||
key attribute of a product or a service. For example in | ||
"The food at Hotel Foo is good", "food" is an aspect of | ||
"Hotel Foo". This property is only returned if `show_aspects` is | ||
set to True in the call to `analyze_sentiment` | ||
:vartype aspects: | ||
list[~azure.ai.textanalytics.AspectSentiment] | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.aspects = kwargs.get("aspects", None) | ||
|
||
@classmethod | ||
def _from_generated(cls, sentence): | ||
def _from_generated(cls, sentence, results): | ||
return cls( | ||
text=sentence.text, | ||
sentiment=sentence.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access | ||
aspects=( | ||
[AspectSentiment._from_generated(aspect, results) for aspect in sentence.aspects] # pylint: disable=protected-access | ||
if hasattr(sentence, "aspects") else None | ||
) | ||
) | ||
|
||
def __repr__(self): | ||
|
@@ -658,6 +672,126 @@ def __repr__(self): | |
)[:1024] | ||
|
||
|
||
class AspectSentiment(DictMixin): | ||
"""AspectSentiment contains the related opinions, predicted sentiment, | ||
confidence scores and other information about an aspect of a product. | ||
An aspect of a product/service is a key component of that product/service. | ||
For example in "The food at Hotel Foo is good", "food" is an aspect of | ||
"Hotel Foo". | ||
|
||
:ivar str text: The aspect text. | ||
:ivar str sentiment: The predicted Sentiment for the aspect. Possible values | ||
include 'positive', 'mixed', and 'negative'. | ||
:ivar confidence_scores: The sentiment confidence score between 0 | ||
and 1 for the aspect for 'positive' and 'negative' labels. It's score | ||
for 'neutral' will always be 0 | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar opinions: All of the opinions related to this aspect. | ||
:vartype opinions: list[~azure.ai.textanalytics.OpinionSentiment] | ||
:ivar int offset: The aspect offset from the start of the sentence. | ||
iscai-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
:ivar int length: The length of the aspect. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.opinions = kwargs.get("opinions", None) | ||
self.offset = kwargs.get("offset", None) | ||
self.length = kwargs.get("length", None) | ||
|
||
@staticmethod | ||
def _get_opinions(relations, results): | ||
if not relations: | ||
return [] | ||
opinion_relations = [r.ref for r in relations if r.relation_type == "opinion"] | ||
opinions = [] | ||
for opinion_relation in opinion_relations: | ||
nums = [int(s) for s in re.findall(r"\d+", opinion_relation)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is technically incorrect parsing of the json pointer (it doesn't take escaping into account). This may or may not be an actual issue (I don't know of any of the keys can have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case, the service will always returns something along the lines of |
||
document_index = nums[0] | ||
sentence_index = nums[1] | ||
opinion_index = nums[2] | ||
opinions.append( | ||
results[document_index].sentences[sentence_index].opinions[opinion_index] | ||
) | ||
return opinions | ||
|
||
|
||
@classmethod | ||
def _from_generated(cls, aspect, results): | ||
return cls( | ||
text=aspect.text, | ||
sentiment=aspect.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(aspect.confidence_scores), # pylint: disable=protected-access | ||
opinions=[ | ||
OpinionSentiment._from_generated(opinion) for opinion in cls._get_opinions(aspect.relations, results) # pylint: disable=protected-access | ||
], | ||
offset=aspect.offset, | ||
length=aspect.length | ||
) | ||
|
||
def __repr__(self): | ||
return "AspectSentiment(text={}, sentiment={}, confidence_scores={}, opinions={}, offset={}, length={})".format( | ||
self.text, | ||
self.sentiment, | ||
repr(self.confidence_scores), | ||
repr(self.opinions), | ||
self.offset, | ||
self.length | ||
)[:1024] | ||
|
||
|
||
class OpinionSentiment(DictMixin): | ||
"""OpinionSentiment contains the predicted sentiment, | ||
confidence scores and other information about an opinion of an aspect. | ||
For example, in the sentence "The food is good", the opinion of the | ||
aspect 'food' is 'good'. | ||
|
||
:ivar str text: The opinion text. | ||
:ivar str sentiment: The predicted Sentiment for the opinion. Possible values | ||
include 'positive', 'mixed', and 'negative'. | ||
:ivar confidence_scores: The sentiment confidence score between 0 | ||
and 1 for the opinion for 'positive' and 'negative' labels. It's score | ||
for 'neutral' will always be 0 | ||
:vartype confidence_scores: | ||
~azure.ai.textanalytics.SentimentConfidenceScores | ||
:ivar int offset: The opinion offset from the start of the sentence. | ||
:ivar int length: The length of the opinion. | ||
:ivar bool is_negated: Whether the opinion is negated. For example, in | ||
"The food is not good", the opinion "good" is negated. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.text = kwargs.get("text", None) | ||
self.sentiment = kwargs.get("sentiment", None) | ||
self.confidence_scores = kwargs.get("confidence_scores", None) | ||
self.offset = kwargs.get("offset", None) | ||
self.length = kwargs.get("length", None) | ||
self.is_negated = kwargs.get("is_negated", None) | ||
|
||
@classmethod | ||
def _from_generated(cls, opinion): | ||
return cls( | ||
text=opinion.text, | ||
sentiment=opinion.sentiment, | ||
confidence_scores=SentimentConfidenceScores._from_generated(opinion.confidence_scores), # pylint: disable=protected-access | ||
offset=opinion.offset, | ||
length=opinion.length, | ||
is_negated=opinion.is_negated | ||
) | ||
|
||
def __repr__(self): | ||
return "OpinionSentiment(text={}, sentiment={}, confidence_scores={}, offset={}, length={}, is_negated={})".format( | ||
self.text, | ||
self.sentiment, | ||
repr(self.confidence_scores), | ||
self.offset, | ||
self.length, | ||
self.is_negated | ||
)[:1024] | ||
|
||
|
||
class SentimentConfidenceScores(DictMixin): | ||
"""The confidence scores (Softmax scores) between 0 and 1. | ||
Higher values indicate higher confidence. | ||
|
@@ -671,15 +805,15 @@ class SentimentConfidenceScores(DictMixin): | |
""" | ||
|
||
def __init__(self, **kwargs): | ||
self.positive = kwargs.get('positive', None) | ||
self.neutral = kwargs.get('neutral', None) | ||
self.negative = kwargs.get('negative', None) | ||
self.positive = kwargs.get('positive', 0.0) | ||
self.neutral = kwargs.get('neutral', 0.0) | ||
self.negative = kwargs.get('negative', 0.0) | ||
|
||
@classmethod | ||
def _from_generated(cls, score): | ||
return cls( | ||
positive=score.positive, | ||
neutral=score.neutral, | ||
neutral=score.neutral if hasattr(score, "netural") else 0.0, | ||
negative=score.negative | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -378,6 +378,12 @@ def analyze_sentiment( # type: ignore | |
:type documents: | ||
list[str] or list[~azure.ai.textanalytics.TextDocumentInput] or | ||
list[dict[str, str]] | ||
:keyword bool show_aspects: Whether to conduct more granular analysis around the aspects of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should add a .. versionadded (or possibly a .. note) directive that indicates in which API version(s) this parameter is available. We should see what the generated docs look like for each since our docs pipelines are customized and I don't think we've used either up until now... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
a product or service (also known as aspect-based sentiment analysis). For example, | ||
in the review "The food at Hotel Foo is good", "food" is an aspect of "Hotel Foo", and | ||
setting `show_aspects` to True will go into the sentiment and opinions of "food". | ||
If set to true, the returned :class:`~azure.ai.textanalytics.SentenceSentiment` objects | ||
will have property `aspects` containing the result of this analysis. | ||
:keyword str language: The 2 letter ISO 639-1 representation of language for the | ||
entire batch. For example, use "en" for English; "es" for Spanish etc. | ||
If not set, uses "en" for English as default. Per-document language will | ||
|
@@ -408,6 +414,15 @@ def analyze_sentiment( # type: ignore | |
docs = _validate_batch_input(documents, "language", language) | ||
model_version = kwargs.pop("model_version", None) | ||
show_stats = kwargs.pop("show_stats", False) | ||
show_aspects = kwargs.pop("show_aspects", None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should default to False There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we need to distinguish between "the application explicitly passed in this value" vs. "the application didn't provide a value, so we'll pick an appropriate default for them" for positional arguments, then we use a sentinel value as the default value. This is often Since we are dealing with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @johanste I do need to still pop kwargs in this case since the name of the parameter has changed (we have it as |
||
|
||
if show_aspects is not None: | ||
if self._api_version == "v3.0": | ||
iscai-msft marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise TypeError( | ||
"Parameter 'show_aspects' is only added for API version v3.1-preview.1 and up" | ||
) | ||
if self._api_version == "v3.1-preview.1": | ||
kwargs.update({"opinion_mining": show_aspects}) | ||
try: | ||
return self._client.sentiment( | ||
documents=docs, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be better to call this SentimentAspect and SentimentOpinion as this represents the
Aspect of the Sentiment
and not theAspect's sentiment
.I feel with
AspectSentiment
it suggests the latter?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have DocumentSentiment, SentenceSentiment, so if we follow the pattern. It would be AspectSentiment and OpinionSentiment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah after talking to @annelo-msft , she brought up a really good point where these all have the same basic structure (have
confidence_scores
,sentiment
etc), and it's more important for a user to have a logical pattern than for the individual names to be the best English.