-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_comparison.py
141 lines (107 loc) · 4.64 KB
/
image_comparison.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Inspiration: https://stackoverflow.com/questions/843972/image-comparison-fast-algorithm
import os
import cv2
import time
import numpy as np
from PIL import Image
from typing import Union
import base64
import io
def load_img(path, load_size=250):
pil_img = Image.open(path)
pil_img.draft('RGB', (load_size, load_size)) # Using shrink-on-load to speed up image load times significantly
img = np.array(pil_img, dtype=np.uint8)
return img
def load_img_from_b64(b64_string, load_size=250):
img_data = base64.b64decode(b64_string)
img = Image.open(io.BytesIO(img_data))
# img.thumbnail((load_size, load_size), Image.ANTIALIAS) # Using thumbnail to resize while maintaining aspect ratio
img = np.array(img, dtype=np.uint8)
return img
def bytes_to_image(byte_data):
print(byte_data)
image_stream = io.BytesIO(byte_data)
image = Image.open(image_stream)
img = np.array(image, dtype=np.uint8)
return img
def color_histogram(img_data, bins=8):
"""
Calculate image features based on color distribution.
:param img_data: 3d numpy array of image pixel values
:param bins: number of bins (features) per color
:return: 1d array of feature values. Length based on the bins (bins times 3 because of 3 colors)
"""
# Verify image shape
if len(img_data.shape) != 3:
raise Exception(f"Image has wrong channel dimensions (Needs to be 3, image has {len(img_data.shape)}).")
# Variable inits
channels = ("red", "green", "blue")
num_of_pixels = img_data.shape[0] * img_data.shape[1]
return_data = []
# Get histograms for RGB and normalize
for i, channel in enumerate(channels):
hist_data, _ = np.histogram(img_data[:, :, i], bins=bins)
hist_data_norm = [round(val/num_of_pixels, 4) for val in hist_data] # Normalize
return_data += hist_data_norm
return np.array(return_data)
def edge_histogram(img_data, bins=8):
"""
Calculate image features based on the edge orientation distribution
:param img_data: 3d numpy array of image pixel values
:param bins: number of bins (features)
:return: 1d array of feature values. Length equal to the bins
"""
greyscale_img = cv2.cvtColor(img_data, cv2.COLOR_BGR2GRAY)
# Calculate edges and compute orientations
gx, gy = np.gradient(greyscale_img) # compute the gradients along x and y axes
eo = np.arctan2(gy, gx) # compute the edge orientations in radians
# Calculate histogram values and normalize
hist_data, _ = np.histogram(eo, bins=bins, range=(-np.pi, np.pi))
total_edges = np.sum(hist_data)
hist_data_norm = np.array([round(val/total_edges, 4) for val in hist_data])
return hist_data_norm
def get_img_features(img_data, bins=8, resize: Union[bool, int] = False):
"""
Calculate the image features for one image
:param img_data: 3d numpy array of image pixel values
:param bins: number of features per feature type (eg. color green or edge orientation)
:param resize: False or int. resize image data before computing features (saves a lot of feature compute time)
:return: dictionary of features
"""
return_data = {}
# Resize Image if defined
if resize:
img_data = cv2.resize(img_data, (resize, resize))
try:
# Get color features
hist_data = color_histogram(img_data, bins=bins)
return_data["color"] = hist_data
# Get edge normal orientation features
edge_data = edge_histogram(img_data, bins=bins)
return_data["edge_orientation"] = edge_data
except Exception as e:
# print("(!) Could not calculate features for image:", e)
raise Exception("Could not calculate features for image:", e)
return return_data
def compare_features(features_a, features_b):
"""
Calculate absolute difference between 2 feature arrays
:param features_a: image 1 features
:param features_b: image 2 features
:return: normalized float representing difference score. Lower is more similar
"""
diff = np.absolute(np.array(features_a) - np.array(features_b))
mae = round(np.mean(diff), 4)
return mae
def compare_two_images(image_a_data, image_b_data):
features_diffs = 0
for feature in image_a_data: # TODO Weights and filtering
feature_diff = compare_features(image_a_data[feature], image_b_data[feature])
features_diffs += feature_diff
# Calc total avg difference for all features of 2 images
try:
total_diff = round(features_diffs / len(image_a_data), 4)
except ZeroDivisionError:
total_diff = 0
similarity = 1 - total_diff # Flip to more understandable similarity percentage
return similarity