GoogleCloudPlatform · nnegrey · Mar 3, 2020 · Mar 3, 2020
diff --git a/vision/cloud-client/resources/kafka.pdf b/vision/cloud-client/resources/kafka.pdf
diff --git a/vision/cloud-client/src/main/java/com/example/vision/BatchAnnotateFiles.java b/vision/cloud-client/src/main/java/com/example/vision/BatchAnnotateFiles.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.vision;
+
+// [START vision_batch_annotate_files]
+import com.google.cloud.vision.v1.AnnotateFileRequest;
+import com.google.cloud.vision.v1.AnnotateImageResponse;
+import com.google.cloud.vision.v1.BatchAnnotateFilesRequest;
+import com.google.cloud.vision.v1.BatchAnnotateFilesResponse;
+import com.google.cloud.vision.v1.Block;
+import com.google.cloud.vision.v1.Feature;
+import com.google.cloud.vision.v1.ImageAnnotatorClient;
+import com.google.cloud.vision.v1.InputConfig;
+import com.google.cloud.vision.v1.Page;
+import com.google.cloud.vision.v1.Paragraph;
+import com.google.cloud.vision.v1.Symbol;
+import com.google.cloud.vision.v1.Word;
+import com.google.protobuf.ByteString;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public class BatchAnnotateFiles {
+
+  public static void batchAnnotateFiles() throws IOException {
+    String filePath = "path/to/your/file.pdf";
+    batchAnnotateFiles(filePath);
+  }
+
+  public static void batchAnnotateFiles(String filePath) throws IOException {
+    // Initialize client that will be used to send requests. This client only needs to be created
+    // once, and can be reused for multiple requests. After completing all of your requests, call
+    // the "close" method on the client to safely clean up any remaining background resources.
+    try (ImageAnnotatorClient imageAnnotatorClient = ImageAnnotatorClient.create()) {
+      // You can send multiple files to be annotated, this sample demonstrates how to do this with
+      // one file. If you want to use multiple files, you have to create a `AnnotateImageRequest`
+      // object for each file that you want annotated.
+      // First read the files contents
+      Path path = Paths.get(filePath);
+      byte[] data = Files.readAllBytes(path);
+      ByteString content = ByteString.copyFrom(data);
+
+      // Specify the input config with the file's contents and its type.
+      // Supported mime_type: application/pdf, image/tiff, image/gif
+      // https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#inputconfig
+      InputConfig inputConfig =
+          InputConfig.newBuilder().setMimeType("application/pdf").setContent(content).build();
+
+      // Set the type of annotation you want to perform on the file
+      // https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#google.cloud.vision.v1.Feature.Type
+      Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
+
+      // Build the request object for that one file. Note: for additional file you have to create
+      // additional `AnnotateFileRequest` objects and store them in a list to be used below.
+      // Since we are sending a file of type `application/pdf`, we can use the `pages` field to
+      // specify which pages to process. The service can process up to 5 pages per document file.
+      // https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#google.cloud.vision.v1.AnnotateFileRequest
+      AnnotateFileRequest fileRequest =
+          AnnotateFileRequest.newBuilder()
+              .setInputConfig(inputConfig)
+              .addFeatures(feature)
+              .addPages(1) // Process the first page
+              .addPages(2) // Process the second page
+              .addPages(-1) // Process the last page
+              .build();
+
+      // Add each `AnnotateFileRequest` object to the batch request.
+      BatchAnnotateFilesRequest request =
+          BatchAnnotateFilesRequest.newBuilder().addRequests(fileRequest).build();
+
+      // Make the synchronous batch request.
+      BatchAnnotateFilesResponse response = imageAnnotatorClient.batchAnnotateFiles(request);
+
+      // Process the results, just get the first result, since only one file was sent in this
+      // sample.
+      for (AnnotateImageResponse imageResponse :
+          response.getResponsesList().get(0).getResponsesList()) {
+        System.out.printf("Full text: %s\n", imageResponse.getFullTextAnnotation().getText());
+        for (Page page : imageResponse.getFullTextAnnotation().getPagesList()) {
+          for (Block block : page.getBlocksList()) {
+            System.out.printf("\nBlock confidence: %s\n", block.getConfidence());
+            for (Paragraph par : block.getParagraphsList()) {
+              System.out.printf("\tParagraph confidence: %s\n", par.getConfidence());
+              for (Word word : par.getWordsList()) {
+                System.out.printf("\t\tWord confidence: %s\n", word.getConfidence());
+                for (Symbol symbol : word.getSymbolsList()) {
+                  System.out.printf(
+                      "\t\t\tSymbol: %s, (confidence: %s)\n",
+                      symbol.getText(), symbol.getConfidence());
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+// [END vision_batch_annotate_files]
diff --git a/vision/cloud-client/src/main/java/com/example/vision/BatchAnnotateFilesGcs.java b/vision/cloud-client/src/main/java/com/example/vision/BatchAnnotateFilesGcs.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.vision;
+
+// [START vision_batch_annotate_files]
+import com.google.cloud.vision.v1.AnnotateFileRequest;
+import com.google.cloud.vision.v1.AnnotateImageResponse;
+import com.google.cloud.vision.v1.BatchAnnotateFilesRequest;
+import com.google.cloud.vision.v1.BatchAnnotateFilesResponse;
+import com.google.cloud.vision.v1.Block;
+import com.google.cloud.vision.v1.Feature;
+import com.google.cloud.vision.v1.GcsSource;
+import com.google.cloud.vision.v1.ImageAnnotatorClient;
+import com.google.cloud.vision.v1.InputConfig;
+import com.google.cloud.vision.v1.Page;
+import com.google.cloud.vision.v1.Paragraph;
+import com.google.cloud.vision.v1.Symbol;
+import com.google.cloud.vision.v1.Word;
+
+import java.io.IOException;
+
+public class BatchAnnotateFilesGcs {
+
+  public static void batchAnnotateFilesGcs() throws IOException {
+    String gcsUri = "gs://cloud-samples-data/vision/document_understanding/kafka.pdf";
+    batchAnnotateFilesGcs(gcsUri);
+  }
+
+  public static void batchAnnotateFilesGcs(String gcsUri) throws IOException {
+    // Initialize client that will be used to send requests. This client only needs to be created
+    // once, and can be reused for multiple requests. After completing all of your requests, call
+    // the "close" method on the client to safely clean up any remaining background resources.
+    try (ImageAnnotatorClient imageAnnotatorClient = ImageAnnotatorClient.create()) {
+      // You can send multiple files to be annotated, this sample demonstrates how to do this with
+      // one file. If you want to use multiple files, you have to create a `AnnotateImageRequest`
+      // object for each file that you want annotated.
+      // First specify where the vision api can find the image
+      GcsSource gcsSource = GcsSource.newBuilder().setUri(gcsUri).build();
+
+      // Specify the input config with the file's uri and its type.
+      // Supported mime_type: application/pdf, image/tiff, image/gif
+      // https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#inputconfig
+      InputConfig inputConfig =
+          InputConfig.newBuilder().setMimeType("application/pdf").setGcsSource(gcsSource).build();
+
+      // Set the type of annotation you want to perform on the file
+      // https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#google.cloud.vision.v1.Feature.Type
+      Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
+
+      // Build the request object for that one file. Note: for additional file you have to create
+      // additional `AnnotateFileRequest` objects and store them in a list to be used below.
+      // Since we are sending a file of type `application/pdf`, we can use the `pages` field to
+      // specify which pages to process. The service can process up to 5 pages per document file.
+      // https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#google.cloud.vision.v1.AnnotateFileRequest
+      AnnotateFileRequest fileRequest =
+          AnnotateFileRequest.newBuilder()
+              .setInputConfig(inputConfig)
+              .addFeatures(feature)
+              .addPages(1) // Process the first page
+              .addPages(2) // Process the second page
+              .addPages(-1) // Process the last page
+              .build();
+
+      // Add each `AnnotateFileRequest` object to the batch request.
+      BatchAnnotateFilesRequest request =
+          BatchAnnotateFilesRequest.newBuilder().addRequests(fileRequest).build();
+
+      // Make the synchronous batch request.
+      BatchAnnotateFilesResponse response = imageAnnotatorClient.batchAnnotateFiles(request);
+
+      // Process the results, just get the first result, since only one file was sent in this
+      // sample.
+      for (AnnotateImageResponse imageResponse :
+          response.getResponsesList().get(0).getResponsesList()) {
+        System.out.printf("Full text: %s\n", imageResponse.getFullTextAnnotation().getText());
+        for (Page page : imageResponse.getFullTextAnnotation().getPagesList()) {
+          for (Block block : page.getBlocksList()) {
+            System.out.printf("\nBlock confidence: %s\n", block.getConfidence());
+            for (Paragraph par : block.getParagraphsList()) {
+              System.out.printf("\tParagraph confidence: %s\n", par.getConfidence());
+              for (Word word : par.getWordsList()) {
+                System.out.printf("\t\tWord confidence: %s\n", word.getConfidence());
+                for (Symbol symbol : word.getSymbolsList()) {
+                  System.out.printf(
+                      "\t\t\tSymbol: %s, (confidence: %s)\n",
+                      symbol.getText(), symbol.getConfidence());
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+// [END vision_batch_annotate_files]
diff --git a/vision/cloud-client/src/test/java/com/example/vision/BatchAnnotateFilesGcsTest.java b/vision/cloud-client/src/test/java/com/example/vision/BatchAnnotateFilesGcsTest.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.vision;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class BatchAnnotateFilesGcsTest {
+  private ByteArrayOutputStream bout;
+  private PrintStream out;
+
+  @Before
+  public void setUp() {
+    bout = new ByteArrayOutputStream();
+    out = new PrintStream(bout);
+    System.setOut(out);
+  }
+
+  @After
+  public void tearDown() {
+    System.setOut(null);
+  }
+
+  @Test
+  public void testSetEndpoint() throws IOException {
+    BatchAnnotateFilesGcs.batchAnnotateFilesGcs(
+        "gs://cloud-samples-data/vision/document_understanding/kafka.pdf");
+
+    String got = bout.toString();
+    assertThat(got).contains("Word confidence");
+  }
+}
diff --git a/vision/cloud-client/src/test/java/com/example/vision/BatchAnnotateFilesTest.java b/vision/cloud-client/src/test/java/com/example/vision/BatchAnnotateFilesTest.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.vision;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class BatchAnnotateFilesTest {
+  private ByteArrayOutputStream bout;
+  private PrintStream out;
+
+  @Before
+  public void setUp() {
+    bout = new ByteArrayOutputStream();
+    out = new PrintStream(bout);
+    System.setOut(out);
+  }
+
+  @After
+  public void tearDown() {
+    System.setOut(null);
+  }
+
+  @Test
+  public void testSetEndpoint() throws IOException {
+    BatchAnnotateFiles.batchAnnotateFiles("resources/kafka.pdf");
+
+    String got = bout.toString();
+    assertThat(got).contains("Word confidence");
+  }
+}