-
Notifications
You must be signed in to change notification settings - Fork 0
/
find-text-and-faces.py
148 lines (103 loc) · 5.01 KB
/
find-text-and-faces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import zipfile
from PIL import Image
import pytesseract
import cv2 as cv
import math
# loading the face detection classifier
face_cascade = cv.CascadeClassifier('readonly/haarcascade_frontalface_default.xml')
FULL_PATH_KEY = "full_path"
STRING_FOUND_KEY = "string_found"
FACE_BOXES_KEY = "face_boxes"
FACE_IMAGES_KEY = "face_images"
def extract_image_filenames(zip_filename):
"""Extract all the image filenames from a zip archive into a dictionary"""
images_dict = {} # Data for all of the images, keyed by base filename.
with zipfile.ZipFile(zip_filename, mode="r") as image_archive:
# Get a list of the contents of the archive.
archive_items = image_archive.infolist()
for im in archive_items:
item_dict = {FULL_PATH_KEY: image_archive.extract(im)}
images_dict[im.filename] = item_dict
return images_dict
def search_for_str(images_dict, search_str):
"""Search for a piece of text in the images, and
store the results in the images_dict param"""
for img_filename, img_data in images_dict.items():
print("Searching for string '{}' in file {}".format(search_str, img_filename))
full_path = img_data[FULL_PATH_KEY]
with Image.open(full_path) as image:
img_data[STRING_FOUND_KEY] = []
page_text = pytesseract.image_to_string(image)
if search_str in page_text:
img_data[STRING_FOUND_KEY].append(search_str)
return images_dict
def find_faces(img_filename):
"""Given an image file, find bounding boxes of faces in the image"""
cv_img = cv.imread(img_filename)
# The first param is an ndarray of the image.
# Setting the minSize removes rectangles that are usually too small to be faces.
# The scaleFactor value was arrived at through trial and error.
# Later improvements and experiments:
# Removed minSize=(100, 100)
# Changed scaleFactor from 1.25 to 1.35
# Added minNeighbors=5
faces = face_cascade.detectMultiScale(cv_img, scaleFactor=1.35, minNeighbors=5)
# detectMultiScale() returns an ndarray of objects as rectangular bounding boxes.
return faces
# I tried using conversions to greyscale and binarization but did not really
# get any better results generally over the set of images in the images.zip archive.
def get_face_images(images_dict):
"""Extract face images for entries in images_dict
for which the search string was found, and
store the sub-images in the images_dict param"""
for img_filename, img_data in images_dict.items():
if not img_data[STRING_FOUND_KEY]:
img_data[FACE_BOXES_KEY] = []
continue
img_data[FACE_BOXES_KEY] = find_faces(img_filename)
img_data[FACE_IMAGES_KEY] = []
with Image.open(img_filename) as img:
for left, upper, width, height in img_data[FACE_BOXES_KEY]:
face_img = img.crop(box=(left, upper, left + width, upper + height))
img_data[FACE_IMAGES_KEY].append(face_img)
return images_dict
def make_contact_sheet(face_images):
"""Make a contact sheet of five faces across in each row"""
# Pixel dimensions of each face.
PIXELS_WIDE = 100
PIXELS_TALL = 100
NUM_COLUMNS = 5 # Five faces across in each row
num_rows = math.ceil(len(face_images) / NUM_COLUMNS)
contact_dimensions = (NUM_COLUMNS * PIXELS_WIDE, num_rows * PIXELS_TALL)
contact_sheet = Image.new(face_images[0].mode, contact_dimensions)
for idx, face in enumerate(face_images):
if face.height > PIXELS_TALL or face.width > PIXELS_WIDE:
face = face.resize((PIXELS_WIDE, PIXELS_TALL))
horz_pos = idx % NUM_COLUMNS
vert_pos = math.floor(idx / NUM_COLUMNS)
contact_sheet.paste(face, (horz_pos * PIXELS_WIDE, vert_pos * PIXELS_TALL))
return contact_sheet
def show_results(images_dict):
"""Finally, show the desired results"""
for img_filename, img_data in images_dict.items():
search_str = img_data[STRING_FOUND_KEY]
if search_str:
print("\n Result '{}' found in file {}".format(search_str[0], img_filename))
face_images = img_data[FACE_IMAGES_KEY]
if face_images:
contact_sheet = make_contact_sheet(face_images)
# display(contact_sheet) # display() is for use within Jupyter notebook
contact_sheet.show() # Show the image in a pop-up window
else:
print("But there were no faces found in that file!")
## ========== Driver code here ====================
import datetime
zip_filename = "readonly/small_img.zip" # Four images
##zip_filename = "readonly/smallest.zip" # Just one image
##zip_filename = "readonly/images.zip"
print("\n Started run @ {} \n".format(datetime.datetime.now()))
images_dict = extract_image_filenames(zip_filename)
search_for_str(images_dict, "Mark") # Either "Christopher" or "Mark" or whatever
get_face_images(images_dict)
show_results(images_dict)
print("\n Completed run @ {} \n".format(datetime.datetime.now()))