Skip to content

Commit

Permalink
add: Document Translation snippets - translate_document and batch_tra…
Browse files Browse the repository at this point in the history
…nslate_document (#183)

* add translate document snippet and test

Change-Id: I44375a48211e4e25ebbd239e8d5cac84b5ce5eaf

* remove gcs_source

Change-Id: Iacf445ef4b24eb5abc8bad776a3217622c14b063

* language code

Change-Id: I2bab14b86f60520dacb60fba741d64372f5090ac

* never mind

Change-Id: I4aacc10e31154672272c22c4ab780e10c82a517e

* add batch translate document snippet and test

Change-Id: I858d4b05c7efd1f7dac2c409eee841208663b22d

* increase timeout

Change-Id: Ic7b4bee9c74721fffcc05dfe1dacb5fe5bb14c13

* increase timeout

Change-Id: I62d71a5a4b444d73d658afa243173814f3cbe900

* increase timeout

Change-Id: I8d15d94473471e3028f08699205877c5d972522d

* update year

Change-Id: Ide83e4507e140d7a6e92ed03b0d864ef4b3f2721

* remove unnecessary comments

Change-Id: Id778e7a0fff787f8b3e1eb63a7bf3217c0145524

* fix formatting

Change-Id: I45d0b1519b975a22f8166acf0d60e6ff57699cd2

* remove whitespace

Change-Id: I597a64c5ad9a99abc1c94c609cd87ec1867ba8a8
  • Loading branch information
aribray authored Jun 29, 2021
1 parent e6b720f commit 1312eb7
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 0 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START translate_v3beta1_batch_translate_document]

from google.cloud import translate_v3beta1 as translate


def batch_translate_document(
input_uri: str,
output_uri: str,
project_id: str,
timeout=180,
):

client = translate.TranslationServiceClient()

# The ``global`` location is not supported for batch translation
location = "us-central1"

# Google Cloud Storage location for the source input. This can be a single file
# (for example, ``gs://translation-test/input.docx``) or a wildcard
# (for example, ``gs://translation-test/*``).
# Supported file types: https://cloud.google.com/translate/docs/supported-formats
gcs_source = {"input_uri": input_uri}

batch_document_input_configs = {
"gcs_source": gcs_source,
}
gcs_destination = {"output_uri_prefix": output_uri}
batch_document_output_config = {"gcs_destination": gcs_destination}
parent = f"projects/{project_id}/locations/{location}"

# Supported language codes: https://cloud.google.com/translate/docs/language
operation = client.batch_translate_document(
request={
"parent": parent,
"source_language_code": "en-US",
"target_language_codes": ["fr-FR"],
"input_configs": [batch_document_input_configs],
"output_config": batch_document_output_config,
}
)

print("Waiting for operation to complete...")
response = operation.result(timeout)

print("Total Pages: {}".format(response.total_pages))


# [END translate_v3beta1_batch_translate_document]
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os
import uuid

from google.cloud import storage
import pytest


import translate_v3beta1_batch_translate_document


PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]


@pytest.fixture(scope="function")
def bucket():
# Create a temporary bucket to store annotation output.
bucket_name = "test-{}".format(uuid.uuid4())
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
bucket = storage_client.create_bucket(bucket, location="us-central1")

yield bucket

bucket.delete(force=True)


@pytest.mark.flaky(max_runs=3, min_passes=1)
def test_batch_translate_document(capsys, bucket):
translate_v3beta1_batch_translate_document.batch_translate_document(
input_uri="gs://cloud-samples-data/translation/async_invoices/*",
output_uri=f"gs://{bucket.name}/translation/BATCH_TRANSLATE_DOCUMENT_OUTPUT/",
project_id=PROJECT_ID,
timeout=1000,
)

out, _ = capsys.readouterr()
assert "Total Pages" in out
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START translate_v3beta1_translate_document]
from google.cloud import translate_v3beta1 as translate


def translate_document(project_id: str, file_path: str):

client = translate.TranslationServiceClient()

location = "us-central1"

parent = f"projects/{project_id}/locations/{location}"

# Supported file types: https://cloud.google.com/translate/docs/supported-formats
with open(file_path, "rb") as document:
document_content = document.read()

document_input_config = {
"content": document_content,
"mime_type": "application/pdf",
}

response = client.translate_document(
request={
"parent": parent,
"target_language_code": "fr-FR",
"document_input_config": document_input_config,
}
)

# To view translated document, write `response.document_translation.byte_stream_outputs` to file.
# If not provided in the TranslationRequest, the translated file will only be returned through a byte-stream
# and its output mime type will be the same as the input file's mime type
print("Response: Detected Language Code - {}".format(response.document_translation.detected_language_code))


# [END translate_v3beta1_translate_document]
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os

import translate_v3beta1_translate_document

PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

FILE_PATH = "resources/fake_invoice.pdf"


def test_translate_document(capsys):
translate_v3beta1_translate_document.translate_document(project_id=PROJECT_ID, file_path=FILE_PATH)
out, _ = capsys.readouterr()
assert "Response" in out

0 comments on commit 1312eb7

Please sign in to comment.