Skip to content
This repository has been archived by the owner on Sep 5, 2023. It is now read-only.

Commit

Permalink
Adds test for encoded characters. [(#961)](GoogleCloudPlatform/python…
Browse files Browse the repository at this point in the history
  • Loading branch information
gguuss authored and busunkim96 committed Sep 29, 2020
1 parent 8558231 commit 6d00f79
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
19 changes: 14 additions & 5 deletions samples/snippets/cloud-client/v1beta2/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"""

import argparse
import sys

from google.cloud import language
from google.cloud.gapic.language.v1beta2 import enums
Expand Down Expand Up @@ -53,7 +54,7 @@ def sentiment_file(gcs_uri):
language_client = language.Client(api_version='v1beta2')

# Instantiates a plain text document.
document = language_client.document_from_url(gcs_uri)
document = language_client.document_from_gcs_url(gcs_uri)

# Detects sentiment in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
Expand Down Expand Up @@ -92,7 +93,7 @@ def entities_file(gcs_uri):
language_client = language.Client(api_version='v1beta2')

# Instantiates a plain text document.
document = language_client.document_from_url(gcs_uri)
document = language_client.document_from_gcs_url(gcs_uri)

# Detects sentiment in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
Expand Down Expand Up @@ -131,7 +132,7 @@ def syntax_file(gcs_uri):
language_client = language.Client(api_version='v1beta2')

# Instantiates a plain text document.
document = language_client.document_from_url(gcs_uri)
document = language_client.document_from_gcs_url(gcs_uri)

# Detects syntax in the document. You can also analyze HTML with:
# document.doc_type == language.Document.HTML
Expand All @@ -152,8 +153,12 @@ def entity_sentiment_text(text):
document.content = text.encode('utf-8')
document.type = enums.Document.Type.PLAIN_TEXT

encoding = enums.EncodingType.UTF32
if sys.maxunicode == 65535:
encoding = enums.EncodingType.UTF16

result = language_client.analyze_entity_sentiment(
document, enums.EncodingType.UTF8)
document, encoding)

for entity in result.entities:
print('Mentions: ')
Expand All @@ -176,8 +181,12 @@ def entity_sentiment_file(gcs_uri):
document.gcs_content_uri = gcs_uri
document.type = enums.Document.Type.PLAIN_TEXT

encoding = enums.EncodingType.UTF32
if sys.maxunicode == 65535:
encoding = enums.EncodingType.UTF16

result = language_client.analyze_entity_sentiment(
document, enums.EncodingType.UTF8)
document, encoding)

for entity in result.entities:
print(u'Name: "{}"'.format(entity.name))
Expand Down
7 changes: 7 additions & 0 deletions samples/snippets/cloud-client/v1beta2/snippets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,10 @@ def test_sentiment_entities_file(capsys):
snippets.entity_sentiment_file(TEST_FILE_URL)
out, _ = capsys.readouterr()
assert 'Content : White House' in out


def test_sentiment_entities_utf(capsys):
snippets.entity_sentiment_text(
'foo→bar')
out, _ = capsys.readouterr()
assert 'Begin Offset : 4' in out

0 comments on commit 6d00f79

Please sign in to comment.