-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature_detection.py
62 lines (45 loc) · 1.4 KB
/
feature_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import cv2
import numpy as np
import os
import pandas as pd
import csv
from sklearn.cluster import MiniBatchKMeans
from sklearn.neural_network import MLPClassifier
img_path = 'input/images/'
train = pd.read_csv('input/train.csv')
species = train.species.sort_values().unique()
# orb = cv2.ORB_create()
sift = cv2.xfeatures2d.SIFT_create()
dico = []
for leaf in train.id:
print ("leaf",leaf)
img = cv2.imread(img_path + str(leaf) + ".jpg")
kp, des = sift.detectAndCompute(img, None)
# kp, des = orb.detectAndCompute(img, None)
for d in des:
dico.append(d)
k = np.size(species) * 10
batch_size = np.size(os.listdir(img_path)) * 3
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=batch_size, verbose=1).fit(dico)
kmeans.verbose = False
histo_list = []
train = train.as_matrix()
for item in train:
leaf = item[0]
print ('leaf', leaf)
species = item[1]
img = cv2.imread(img_path + str(leaf) + ".jpg")
kp, des = sift.detectAndCompute(img, None)
# kp, des = orb.detectAndCompute(img, None)
histo = np.zeros(k+2)
nkp = np.size(kp)
for d in des:
idx = kmeans.predict([d])
histo[idx] += float(1)/nkp # Because we need normalized histograms, I prefere to add 1/nkp directly
histo = histo.tolist()
histo = [leaf, species] + histo
histo_list.append(histo)
X = np.array(histo_list)
Y = []
X = pd.DataFrame(X)
X.to_csv("train_visord-orb.csv")