Skip to content

Commit

Permalink
Merge pull request #220 from tukcomCD2024/AI#36/Preprocessing
Browse files Browse the repository at this point in the history
Ai#36/preprocessing
  • Loading branch information
entellaKa authored May 21, 2024
2 parents edb912b + 168400d commit b745178
Show file tree
Hide file tree
Showing 581 changed files with 30,095 additions and 88 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@


*.h5
.env

backend/shareNote/src/main/resources/application-email.yml
*.h5

8 changes: 8 additions & 0 deletions AI/.idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions AI/.idea/.name

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions AI/.idea/AI.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions AI/.idea/codeStyles/codeStyleConfig.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 28 additions & 28 deletions AI/.idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion AI/.idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions AI/.idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2,766 changes: 2,737 additions & 29 deletions AI/.idea/workspace.xml

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions AI/Model/ImageDataArrange.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import shutil


def getImageListByHash(path):
img = [{}] # imagesWithTag
for i in os.listdir(path):
if os.path.isdir(f'{path}/{i}'):
img[0][i] = getImageListByHash(f'{path}/{i}')
else:
img.append(i)
if len(img[0]) == 0:
img.pop(0)
if len(img) == 1:
img = img[0]
return img


# print(getImageListByHash('../asset/image/sample/svg/'))

def imageCategorical():
src = 'C:/Users/Ka/Desktop/Ka/programming/AI/AI2/asset/size224Image01'
for routes, dirs, files in os.walk(src):
print(routes, files, dirs)
for image in files:
os.mkdir(routes + '/' + image[:-4])
shutil.move(routes + '/' + image, routes + '/' + image[:-4] + '/' + image)


imageCategorical()
54 changes: 54 additions & 0 deletions AI/Model/ImageDataGenerator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pylab as plt

data_file_path = r"C:\Users\Ka\Desktop\Ka\대학교\졸업작품\project\Code-Bridge\AI\asset\image"

train_data_generator = ImageDataGenerator(
rescale=1. / 255,
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0

featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
width_shift_range=0.2, # randomly shift images horizontally (fraction of total width)
height_shift_range=0.2, # randomly shift images vertically (fraction of total height)
zoom_range=[0.7, 1.1],
horizontal_flip=True, # randomly flip images
vertical_flip=False,
validation_split=0.1) # randomly flip images

test_data_generator = ImageDataGenerator(rescale=1. / 255)

train_generator = train_data_generator.flow_from_directory(
data_file_path + "/animals",
target_size=(224, 224),
batch_size=3,
class_mode='categorical',
subset='training')

test_generator = train_data_generator.flow_from_directory(
data_file_path + "/animals",
target_size=(224, 224),
batch_size=3,
class_mode='categorical',
subset='validation')

# x_train, y_train = train_generator.next()
# for idx in range(len(x_train)):
# print(x_train[idx].shape)
# print(y_train[idx])
# plt.imshow(x_train[idx])
# plt.show()
#
# print(len(x_train))
# print(train_generator.n)
#
# for i in range(40):
# x_train, y_train = train_generator.next()
# for idx in range(len(x_train)):
# # print(x_train[idx].shape)
# # print(y_train[idx])
# plt.imshow(x_train[idx])
# plt.show()
Binary file not shown.
99 changes: 99 additions & 0 deletions AI/Model/cnnModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import RMSprop
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
imageSize = 128

trdata = ImageDataGenerator(zoom_range=[0.9, 1.3], height_shift_range=0.3, width_shift_range=0.2, horizontal_flip=True, rotation_range=0.3,
validation_split=0.1)
traindata = trdata.flow_from_directory(directory=r"C:\Users\Ka\Desktop\Ka\대학교\졸업작품\project\Code-Bridge\AI\asset\image\svg",
target_size=(imageSize, imageSize), class_mode='categorical', batch_size=20)
tsdata = ImageDataGenerator()
testdata = tsdata.flow_from_directory(directory=r"C:\Users\Ka\Desktop\Ka\대학교\졸업작품\project\Code-Bridge\AI\asset\image\svg", target_size=(imageSize, imageSize),
class_mode='categorical')


def cnnDepth4():
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(imageSize, imageSize, 3)))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))

model.add(Flatten())
model.add(Dense(units=1024, activation="relu"))
model.add(Dense(units=103, activation="sigmoid"))

return model


def cnnDepth5():
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(imageSize, imageSize, 3)))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))

model.add(Flatten())
model.add(Dense(units=1024, activation="relu"))
model.add(Dense(units=1024, activation="relu"))
model.add(Dense(units=103, activation="sigmoid"))

return model


def cnnDepth6():
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(imageSize, imageSize, 3)))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))

model.add(Flatten())
model.add(Dense(units=1024, activation="relu"))
model.add(Dense(units=1024, activation="relu"))
model.add(Dense(units=103, activation="sigmoid"))

return model


def createModel(model, lr, e1, name):
modelNames = ['cnn4Depth.h5', 'cnn5Depth.h5', 'cnn6Depth.h5']
modelFuncs = [cnnDepth4(), cnnDepth5(), cnnDepth6()]

opt = RMSprop(lr=lr)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

checkpoint = ModelCheckpoint(name, monitor='val_accuracy', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=30, verbose=1, mode='auto')
# hist = model.fit_generator(steps_per_epoch=len(traindata), generator=traindata, validation_data=testdata,
# validation_steps=len(testdata), epochs=4, callbacks=[checkpoint, early])

hist = model.fit(traindata, steps_per_epoch=len(traindata), validation_data=testdata, validation_steps=len(testdata),
epochs=e1, callbacks=[checkpoint, early], batch_size=5)

import matplotlib.pyplot as plt

plt.plot(hist.history["accuracy"])
plt.plot(hist.history['val_accuracy'])
plt.plot(hist.history['val_loss'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.ylim(0, 5)
plt.legend(["Accuracy", "Validation Accuracy", "Validation Loss"])
plt.show()

for e in range(40, 110, 10):
createModel(cnnDepth6(), 0.0001, e, f'cnn6f32e{e}u1024.h5')
createModel(cnnDepth5(), 0.0001, e, f'cnn5f32e{e}u1024.h5')
createModel(cnnDepth4(), 0.0001, e, f'cnn4f32e{e}u1024.h5')
47 changes: 47 additions & 0 deletions AI/Model/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os
from PIL import Image

import numpy as np


def resultByDesc(result):
x = {}
for i in range(len(result)):
if result[i] > 0.001:
x[i] = result[i]

import operator
return sorted(x.items(), key=operator.itemgetter(1), reverse=True)[:40]


# imgsrc = r"C:\Users\Ka\Desktop\Ka\programming\AI\AI2\asset\size64Image01\bank\bank.png"
# imgsrc = r"C:\Users\Ka\Desktop\Ka\programming\AI\AI2\asset\size64Image01\security\security.png"
imgsrc = r"C:\Users\Ka\Desktop\Ka\programming\AI\sample\{}.png"
images = {'arrow': 4, 'heart': 51, 'heart2': 51, 'cloud': 28, 'sword': 91, 'skirt': 83, 'star': 87, 'star2': 87, 'bread':17, 'bread2':17, 'candy':26}

from keras.models import load_model

for i in os.listdir('./'):
if not 'f32' in i:
continue
print(i)

saved_model = load_model(i)

for i in images.keys():
img = Image.open(imgsrc.format(i))
img = img.resize((128, 128))
img = img.convert("RGB")
img = np.array(img)
img = np.expand_dims(img, axis=0)

output = saved_model.predict(img)

result = resultByDesc(output[0])[:8]
for j in range(5):
try:
if images[i] in result[j]:
print(i, j)
break
except:
break
46 changes: 46 additions & 0 deletions AI/Preprocessing/CheckImageExist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import csv
import chromedriver_autoinstaller as cd_installer
from selenium import webdriver
from selenium.webdriver.common.by import By

cd_installer.install()


def getReaderTXT(url, token):
f = open(url)
return f.readline().split(token)


def getReaderCSV(url):
f = open(url)
return csv.reader(f)

def availableTag(reader):
driver = webdriver.Chrome()
checkMsgPath = '//*[@id="app"]/div[2]/div/div/div[2]/div[1]'

f = open('../asset/tag/tagsFinal5.txt', 'w')
print(len(reader))
for row in reader:
driver.get(f"https://icon-sets.iconify.design/?query={row}")
driver.implicitly_wait(200)
try:
el = driver.find_element(By.XPATH, checkMsgPath)
if el.text != 'No icon sets match your search':
f.write(row+"\n")
except Exception as e:
pass

f = open('../asset/tag/sampleTag.txt', 'r')

availableTags = []
for i in f.readlines():
availableTags.append(i[:-1])
writer = csv.DictWriter(open('../asset/tag/sampleTags.csv', 'w', newline='\n'), fieldnames=['tag', 'count'])
writer.writeheader()
for row in reader:
if row[0] in availableTags:
writer.writerow({'tag': row[0], 'count': row[1]})


availableTag(getReaderTXT('../asset/tag/sampleTag.txt', ', '))
Loading

0 comments on commit b745178

Please sign in to comment.