1818Google Cloud API.
1919
2020Usage Examples:
21- python beta_snippets.py \
22- transcription gs://python-docs-samples-tests/video/googlework_short.mp4
21+ python beta_snippets.py transcription \
22+ gs://python-docs-samples-tests/video/googlework_short.mp4
23+ python beta_snippets.py video-text-gcs \
24+ gs://python-docs-samples-tests/video/googlework_short.mp4
25+ python beta_snippets.py track-objects /resources/cat.mp4
2326"""
2427
2528import argparse
29+ import io
2630
27- from google .cloud import videointelligence_v1p1beta1 as videointelligence
2831
29-
30- # [START video_speech_transcription_gcs_beta]
3132def speech_transcription (input_uri ):
33+ # [START video_speech_transcription_gcs_beta]
3234 """Transcribe speech from a video stored on GCS."""
35+ from google .cloud import videointelligence_v1p1beta1 as videointelligence
36+
3337 video_client = videointelligence .VideoIntelligenceServiceClient ()
3438
3539 features = [videointelligence .enums .Feature .SPEECH_TRANSCRIPTION ]
@@ -66,7 +70,202 @@ def speech_transcription(input_uri):
6670 start_time .seconds + start_time .nanos * 1e-9 ,
6771 end_time .seconds + end_time .nanos * 1e-9 ,
6872 word ))
69- # [END video_speech_transcription_gcs_beta]
73+ # [END video_speech_transcription_gcs_beta]
74+
75+
76+ def video_detect_text_gcs (input_uri ):
77+ # [START video_detect_text_gcs_beta]
78+ """Detect text in a video stored on GCS."""
79+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
80+
81+ video_client = videointelligence .VideoIntelligenceServiceClient ()
82+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
83+
84+ operation = video_client .annotate_video (
85+ input_uri = input_uri ,
86+ features = features )
87+
88+ print ('\n Processing video for text detection.' )
89+ result = operation .result (timeout = 300 )
90+
91+ # The first result is retrieved because a single video was processed.
92+ annotation_result = result .annotation_results [0 ]
93+
94+ # Get only the first result
95+ text_annotation = annotation_result .text_annotations [0 ]
96+ print ('\n Text: {}' .format (text_annotation .text ))
97+
98+ # Get the first text segment
99+ text_segment = text_annotation .segments [0 ]
100+ start_time = text_segment .segment .start_time_offset
101+ end_time = text_segment .segment .end_time_offset
102+ print ('start_time: {}, end_time: {}' .format (
103+ start_time .seconds + start_time .nanos * 1e-9 ,
104+ end_time .seconds + end_time .nanos * 1e-9 ))
105+
106+ print ('Confidence: {}' .format (text_segment .confidence ))
107+
108+ # Show the result for the first frame in this segment.
109+ frame = text_segment .frames [0 ]
110+ time_offset = frame .time_offset
111+ print ('Time offset for the first frame: {}' .format (
112+ time_offset .seconds + time_offset .nanos * 1e-9 ))
113+ print ('Rotated Bounding Box Vertices:' )
114+ for vertex in frame .rotated_bounding_box .vertices :
115+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
116+ # [END video_detect_text_gcs_beta]
117+ return annotation_result .text_annotations
118+
119+
120+ def video_detect_text (path ):
121+ # [START video_detect_text_beta]
122+ """Detect text in a local video."""
123+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
124+
125+ video_client = videointelligence .VideoIntelligenceServiceClient ()
126+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
127+ video_context = videointelligence .types .VideoContext ()
128+
129+ with io .open (path , 'rb' ) as file :
130+ input_content = file .read ()
131+
132+ operation = video_client .annotate_video (
133+ input_content = input_content , # the bytes of the video file
134+ features = features ,
135+ video_context = video_context )
136+
137+ print ('\n Processing video for text detection.' )
138+ result = operation .result (timeout = 300 )
139+
140+ # The first result is retrieved because a single video was processed.
141+ annotation_result = result .annotation_results [0 ]
142+
143+ # Get only the first result
144+ text_annotation = annotation_result .text_annotations [0 ]
145+ print ('\n Text: {}' .format (text_annotation .text ))
146+
147+ # Get the first text segment
148+ text_segment = text_annotation .segments [0 ]
149+ start_time = text_segment .segment .start_time_offset
150+ end_time = text_segment .segment .end_time_offset
151+ print ('start_time: {}, end_time: {}' .format (
152+ start_time .seconds + start_time .nanos * 1e-9 ,
153+ end_time .seconds + end_time .nanos * 1e-9 ))
154+
155+ print ('Confidence: {}' .format (text_segment .confidence ))
156+
157+ # Show the result for the first frame in this segment.
158+ frame = text_segment .frames [0 ]
159+ time_offset = frame .time_offset
160+ print ('Time offset for the first frame: {}' .format (
161+ time_offset .seconds + time_offset .nanos * 1e-9 ))
162+ print ('Rotated Bounding Box Vertices:' )
163+ for vertex in frame .rotated_bounding_box .vertices :
164+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
165+ # [END video_detect_text_beta]
166+ return annotation_result .text_annotations
167+
168+
169+ def track_objects_gcs (gcs_uri ):
170+ # [START video_object_tracking_gcs_beta]
171+ """Object Tracking."""
172+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
173+
174+ # It is recommended to use location_id as 'us-east1' for the best latency
175+ # due to different types of processors used in this region and others.
176+ video_client = videointelligence .VideoIntelligenceServiceClient ()
177+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
178+ operation = video_client .annotate_video (
179+ input_uri = gcs_uri , features = features , location_id = 'us-east1' )
180+ print ('\n Processing video for object annotations.' )
181+
182+ result = operation .result (timeout = 300 )
183+ print ('\n Finished processing.\n ' )
184+
185+ # The first result is retrieved because a single video was processed.
186+ object_annotations = result .annotation_results [0 ].object_annotations
187+
188+ # Get only the first annotation for demo purposes.
189+ object_annotation = object_annotations [0 ]
190+ print ('Entity description: {}' .format (
191+ object_annotation .entity .description ))
192+ if object_annotation .entity .entity_id :
193+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
194+
195+ print ('Segment: {}s to {}s' .format (
196+ object_annotation .segment .start_time_offset .seconds +
197+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
198+ object_annotation .segment .end_time_offset .seconds +
199+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
200+
201+ print ('Confidence: {}' .format (object_annotation .confidence ))
202+
203+ # Here we print only the bounding box of the first frame in this segment
204+ frame = object_annotation .frames [0 ]
205+ box = frame .normalized_bounding_box
206+ print ('Time offset of the first frame: {}s' .format (
207+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
208+ print ('Bounding box position:' )
209+ print ('\t left : {}' .format (box .left ))
210+ print ('\t top : {}' .format (box .top ))
211+ print ('\t right : {}' .format (box .right ))
212+ print ('\t bottom: {}' .format (box .bottom ))
213+ print ('\n ' )
214+ # [END video_object_tracking_gcs_beta]
215+ return object_annotations
216+
217+
218+ def track_objects (path ):
219+ # [START video_object_tracking_beta]
220+ """Object Tracking."""
221+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
222+
223+ video_client = videointelligence .VideoIntelligenceServiceClient ()
224+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
225+
226+ with io .open (path , 'rb' ) as file :
227+ input_content = file .read ()
228+
229+ # It is recommended to use location_id as 'us-east1' for the best latency
230+ # due to different types of processors used in this region and others.
231+ operation = video_client .annotate_video (
232+ input_content = input_content , features = features , location_id = 'us-east1' )
233+ print ('\n Processing video for object annotations.' )
234+
235+ result = operation .result (timeout = 300 )
236+ print ('\n Finished processing.\n ' )
237+
238+ # The first result is retrieved because a single video was processed.
239+ object_annotations = result .annotation_results [0 ].object_annotations
240+
241+ # Get only the first annotation for demo purposes.
242+ object_annotation = object_annotations [0 ]
243+ print ('Entity description: {}' .format (
244+ object_annotation .entity .description ))
245+ if object_annotation .entity .entity_id :
246+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
247+
248+ print ('Segment: {}s to {}s' .format (
249+ object_annotation .segment .start_time_offset .seconds +
250+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
251+ object_annotation .segment .end_time_offset .seconds +
252+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
253+
254+ print ('Confidence: {}' .format (object_annotation .confidence ))
255+
256+ # Here we print only the bounding box of the first frame in this segment
257+ frame = object_annotation .frames [0 ]
258+ box = frame .normalized_bounding_box
259+ print ('Time offset of the first frame: {}s' .format (
260+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
261+ print ('Bounding box position:' )
262+ print ('\t left : {}' .format (box .left ))
263+ print ('\t top : {}' .format (box .top ))
264+ print ('\t right : {}' .format (box .right ))
265+ print ('\t bottom: {}' .format (box .bottom ))
266+ print ('\n ' )
267+ # [END video_object_tracking_beta]
268+ return object_annotations
70269
71270
72271if __name__ == '__main__' :
@@ -79,7 +278,31 @@ def speech_transcription(input_uri):
79278 'transcription' , help = speech_transcription .__doc__ )
80279 speech_transcription_parser .add_argument ('gcs_uri' )
81280
281+ video_text_gcs_parser = subparsers .add_parser (
282+ 'video-text-gcs' , help = video_detect_text_gcs .__doc__ )
283+ video_text_gcs_parser .add_argument ('gcs_uri' )
284+
285+ video_text_parser = subparsers .add_parser (
286+ 'video-text' , help = video_detect_text .__doc__ )
287+ video_text_parser .add_argument ('path' )
288+
289+ video_object_tracking_gcs_parser = subparsers .add_parser (
290+ 'track-objects-gcs' , help = track_objects_gcs .__doc__ )
291+ video_object_tracking_gcs_parser .add_argument ('gcs_uri' )
292+
293+ video_object_tracking_parser = subparsers .add_parser (
294+ 'track-objects' , help = track_objects .__doc__ )
295+ video_object_tracking_parser .add_argument ('path' )
296+
82297 args = parser .parse_args ()
83298
84299 if args .command == 'transcription' :
85300 speech_transcription (args .gcs_uri )
301+ elif args .command == 'video-text-gcs' :
302+ video_detect_text_gcs (args .gcs_uri )
303+ elif args .command == 'video-text' :
304+ video_detect_text (args .path )
305+ elif args .command == 'track-objects-gcs' :
306+ track_objects_gcs (args .gcs_uri )
307+ elif args .command == 'track-objects' :
308+ track_objects (args .path )
0 commit comments