Merge pull request #2236 from daspecster/vision-landmark-detection

Add vision landmark detection
googleapis · Sep 12, 2016 · 71228f7 · 71228f7
2 parents 15e8aec + 419cb9c
commit 71228f7
Show file tree

Hide file tree

Showing 5 changed files with 217 additions and 10 deletions.
diff --git a/google/cloud/vision/entity.py b/google/cloud/vision/entity.py
@@ -16,6 +16,7 @@
 
 
 from google.cloud.vision.geometry import Bounds
+from google.cloud.vision.geometry import LocationInformation
 
 
 class EntityAnnotation(object):
@@ -27,15 +28,20 @@ class EntityAnnotation(object):
     :type description: str
     :param description: Description of entity detected in an image.
 
+    :type locations: list of
+                     :class:`~google.cloud.vision.geometry.LocationInformation`.
+    :param locations: List of ``LocationInformation`` instances.
+
     :type mid: str
     :param mid: Opaque entity ID.
 
     :type score: float
     :param score: Overall score of the result. Range [0, 1].
     """
-    def __init__(self, bounds, description, mid, score):
+    def __init__(self, bounds, description, locations, mid, score):
         self._bounds = bounds
         self._description = description
+        self._locations = locations
         self._mid = mid
         self._score = score
 
@@ -51,10 +57,12 @@ def from_api_repr(cls, response):
         """
         bounds = Bounds.from_api_repr(response['boundingPoly'])
         description = response['description']
+        locations = [LocationInformation.from_api_repr(location)
+                     for location in response.get('locations', [])]
         mid = response['mid']
         score = response['score']
 
-        return cls(bounds, description, mid, score)
+        return cls(bounds, description, locations, mid, score)
 
     @property
     def bounds(self):
@@ -74,6 +82,16 @@ def description(self):
         """
         return self._description
 
+    @property
+    def locations(self):
+        """Location coordinates landmarks detected.
+
+        :rtype: :class:`~google.cloud.vision.geometry.LocationInformation`
+        :returns: ``LocationInformation`` populated with latitude and longitude
+                  of object detected in an image.
+        """
+        return self._locations
+
     @property
     def mid(self):
         """MID of feature detected in image.

diff --git a/google/cloud/vision/geometry.py b/google/cloud/vision/geometry.py
@@ -58,6 +58,53 @@ class FDBounds(BoundsBase):
     """The bounding polygon of just the skin portion of the face."""
 
 
+class LocationInformation(object):
+    """Representation of location information returned by the Vision API.
+
+    :type latitude: float
+    :param latitude: Latitude coordinate of geographical location.
+
+    :type longitude: float
+    :param longitude: Longitude coordinate of geographical location.
+    """
+    def __init__(self, latitude, longitude):
+        self._latitude = latitude
+        self._longitude = longitude
+
+    @classmethod
+    def from_api_repr(cls, response):
+        """Factory: construct location information from Vision API response.
+
+        :type response: dict
+        :param response: Dictionary response of locations.
+
+        :rtype: :class:`~google.cloud.vision.geometry.LocationInformation`
+        :returns: ``LocationInformation`` with populated latitude and
+                  longitude.
+        """
+        latitude = response['latLng']['latitude']
+        longitude = response['latLng']['longitude']
+        return cls(latitude, longitude)
+
+    @property
+    def latitude(self):
+        """Latitude coordinate.
+
+        :rtype: float
+        :returns: Latitude coordinate of location.
+        """
+        return self._latitude
+
+    @property
+    def longitude(self):
+        """Longitude coordinate.
+
+        :rtype: float
+        :returns: Longitude coordinate of location.
+        """
+        return self._longitude
+
+
 class Position(object):
     """A 3D position in the image.
 

diff --git a/google/cloud/vision/image.py b/google/cloud/vision/image.py
@@ -82,6 +82,28 @@ def source(self):
         """
         return self._source
 
+    def _detect_annotation(self, feature):
+        """Generic method for detecting a single annotation.
+
+        :type feature: :class:`~google.cloud.vision.feature.Feature`
+        :param feature: The ``Feature`` indication the type of annotation to
+                        perform.
+
+        :rtype: list
+        :returns: List of
+                  :class:`~google.cloud.vision.entity.EntityAnnotation`.
+        """
+        reverse_types = {
+            'LANDMARK_DETECTION': 'landmarkAnnotations',
+            'LOGO_DETECTION': 'logoAnnotations',
+        }
+        detected_objects = []
+        result = self.client.annotate(self, [feature])
+        for response in result[reverse_types[feature.feature_type]]:
+            detected_object = EntityAnnotation.from_api_repr(response)
+            detected_objects.append(detected_object)
+        return detected_objects
+
     def detect_faces(self, limit=10):
         """Detect faces in image.
 
@@ -100,6 +122,19 @@ def detect_faces(self, limit=10):
 
         return faces
 
+    def detect_landmarks(self, limit=10):
+        """Detect landmarks in an image.
+
+        :type limit: int
+        :param limit: The maximum number of landmarks to find.
+
+        :rtype: list
+        :returns: List of
+                  :class:`~google.cloud.vision.entity.EntityAnnotation`.
+        """
+        feature = Feature(FeatureTypes.LANDMARK_DETECTION, limit)
+        return self._detect_annotation(feature)
+
     def detect_logos(self, limit=10):
         """Detect logos in an image.
 
@@ -110,11 +145,5 @@ def detect_logos(self, limit=10):
         :returns: List of
                   :class:`~google.cloud.vision.entity.EntityAnnotation`.
         """
-        logos = []
-        logo_detection_feature = Feature(FeatureTypes.LOGO_DETECTION, limit)
-        result = self.client.annotate(self, [logo_detection_feature])
-        for logo_response in result['logoAnnotations']:
-            logo = EntityAnnotation.from_api_repr(logo_response)
-            logos.append(logo)
-
-        return logos
+        feature = Feature(FeatureTypes.LOGO_DETECTION, limit)
+        return self._detect_annotation(feature)
diff --git a/unit_tests/vision/_fixtures.py b/unit_tests/vision/_fixtures.py
@@ -1,3 +1,78 @@
+LANDMARK_DETECTION_RESPONSE = {
+    'responses': [
+        {
+            'landmarkAnnotations': [
+                {
+                    'mid': '/m/04gdr',
+                    'description': 'Louvre',
+                    'score': 0.67257267,
+                    'boundingPoly': {
+                        'vertices': [
+                            {
+                                'x': 1075,
+                                'y': 49
+                            },
+                            {
+                                'x': 1494,
+                                'y': 49
+                            },
+                            {
+                                'x': 1494,
+                                'y': 307
+                            },
+                            {
+                                'x': 1075,
+                                'y': 307
+                            }
+                        ]
+                    },
+                    'locations': [
+                        {
+                            'latLng': {
+                                'latitude': 48.861013,
+                                'longitude': 2.335818
+                            }
+                        }
+                    ]
+                },
+                {
+                    'mid': '/m/094llg',
+                    'description': 'Louvre Pyramid',
+                    'score': 0.53734678,
+                    'boundingPoly': {
+                        'vertices': [
+                            {
+                                'x': 227,
+                                'y': 274
+                            },
+                            {
+                                'x': 1471,
+                                'y': 274
+                            },
+                            {
+                                'x': 1471,
+                                'y': 624
+                            },
+                            {
+                                'x': 227,
+                                'y': 624
+                            }
+                        ]
+                    },
+                    'locations': [
+                        {
+                            'latLng': {
+                                'latitude': 48.860749,
+                                'longitude': 2.336312
+                            }
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
+
 LOGO_DETECTION_RESPONSE = {
     'responses': [
         {

diff --git a/unit_tests/vision/test_client.py b/unit_tests/vision/test_client.py
@@ -114,6 +114,44 @@ def test_face_detection_from_content(self):
                          image_request['image']['content'])
         self.assertEqual(5, image_request['features'][0]['maxResults'])
 
+    def test_landmark_detection_from_source(self):
+        from google.cloud.vision.entity import EntityAnnotation
+        from unit_tests.vision._fixtures import (LANDMARK_DETECTION_RESPONSE as
+                                                 RETURNED)
+        credentials = _Credentials()
+        client = self._makeOne(project=self.PROJECT, credentials=credentials)
+        client.connection = _Connection(RETURNED)
+
+        image = client.image(source_uri=_IMAGE_SOURCE)
+        landmarks = image.detect_landmarks(limit=3)
+        self.assertEqual(2, len(landmarks))
+        self.assertTrue(isinstance(landmarks[0], EntityAnnotation))
+        image_request = client.connection._requested[0]['data']['requests'][0]
+        self.assertEqual(_IMAGE_SOURCE,
+                         image_request['image']['source']['gcs_image_uri'])
+        self.assertEqual(3, image_request['features'][0]['maxResults'])
+        self.assertEqual(48.861013, landmarks[0].locations[0].latitude)
+        self.assertEqual(2.335818, landmarks[0].locations[0].longitude)
+        self.assertEqual('/m/04gdr', landmarks[0].mid)
+        self.assertEqual('/m/094llg', landmarks[1].mid)
+
+    def test_landmark_detection_from_content(self):
+        from google.cloud.vision.entity import EntityAnnotation
+        from unit_tests.vision._fixtures import (LANDMARK_DETECTION_RESPONSE as
+                                                 RETURNED)
+        credentials = _Credentials()
+        client = self._makeOne(project=self.PROJECT, credentials=credentials)
+        client.connection = _Connection(RETURNED)
+
+        image = client.image(content=_IMAGE_CONTENT)
+        landmarks = image.detect_landmarks(limit=5)
+        self.assertEqual(2, len(landmarks))
+        self.assertTrue(isinstance(landmarks[0], EntityAnnotation))
+        image_request = client.connection._requested[0]['data']['requests'][0]
+        self.assertEqual(self.B64_IMAGE_CONTENT,
+                         image_request['image']['content'])
+        self.assertEqual(5, image_request['features'][0]['maxResults'])
+
     def test_logo_detection_from_source(self):
         from google.cloud.vision.entity import EntityAnnotation
         from unit_tests.vision._fixtures import LOGO_DETECTION_RESPONSE