Link dataset: Indian Coin Denomination Dataset (ICDD) (kaggle.com)
Em đã cắt ảnh và chuẩn hóa về 300x300 nhờ Otsu và fitElipse. Có 128/900 bức bị xóa thủ công do không thể cắt(22%). Code tiền xử lý:
import cv2
import numpy as np
import copy
import math
def get_to_edge(start, direction, threshold, h_frame, w_frame):
x = start[0]
y = start[1]
while True:
black_cnt = (w_frame[1] - w_frame[0]) * (h_frame[1] - h_frame[0]) - np.count_nonzero(threshold[
x + h_frame[0]: x + h_frame[1],
y + w_frame[0]: y + w_frame[1]])
if black_cnt >= 3:
break
x += direction[0]
y += direction[1]
return (x, y)
def get_bound(threshold):
p1 = get_to_edge((0, threshold.shape[1] // 2), (1, 0), threshold,
(0, 20), (-5, 5))
p2 = get_to_edge((threshold.shape[0] - 1, threshold.shape[1] // 2), (-1, 0), threshold,
(-20, 0), (-5, 5))
p3 = get_to_edge((threshold.shape[0] // 2, 0), (0, 1), threshold,
(-5, 5), (0, 20))
p4 = get_to_edge((threshold.shape[0] // 2, threshold.shape[1] - 1), (0, -1), threshold,
(-5, 5), (-20, 0))
p5 = get_to_edge((0, 0), (3, 4), threshold, (0, 20), (0, 20))
p6 = get_to_edge((threshold.shape[0] - 1, threshold.shape[1] - 1), (-3, -4), threshold,
(-10, 0), (-10, 0))
# cv2.circle(img, p1[::-1], 10, (0, 0, 255), -1)
# cv2.circle(img, p2[::-1], 10, (0, 0, 255), -1)
# cv2.circle(img, p3[::-1], 10, (0, 0, 255), -1)
# cv2.circle(img, p4[::-1], 10, (0, 0, 255), -1)
# cv2.circle(img, p5[::-1], 10, (0, 0, 255), -1)
# cv2.circle(img, p6[::-1], 10, (0, 0, 255), -1)
return p1[0], p2[0], p3[1], p4[1], cv2.fitEllipse(np.array([
p1[::-1],
p2[::-1],
p3[::-1],
p4[::-1],
p5[::-1],
p6[::-1]]))
def to_int_tup(tup):
return (int(tup[0]), int(tup[1]))
def get_axes_length(tup):
return (int(tup[0] / 2), int(tup[1] / 2))
def pre_process(path):
img = cv2.imread(path)
gray = cv2.imread(path, 0)
_, thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imwrite("thres.png", thresholded)
h_start, h_end, w_start, w_end, params = get_bound(thresholded)
#cv2.imwrite(r'points.png', img)
h, k = params[0]
a, b = params[1]
a = (a / 2 + 10)
b = (b / 2 + 10)
rad = params[2] * math.pi / 180
for x in range(w_start, w_end):
for y in range(h_start, h_end):
first_term = ((x - h) * math.cos(rad) + (y - k) * math.sin(rad))**2 / a**2
second_term = ((x - h) * math.sin(rad) - (y - k) * math.cos(rad))**2 / b**2
if first_term + second_term > 1:
img[y, x] = (0, 0, 0)
img = img[h_start:h_end, w_start:w_end]
rows, cols = img.shape[0:2]
win_size = 200
norm_size = 300
if rows > cols:
cols = int(cols * (win_size / rows))
rows = win_size
else:
rows = int(rows * (win_size / cols))
cols = win_size
img = cv2.resize(img, (cols, rows))
colsPadding = (int(math.ceil((norm_size-cols)/2.0)),int(math.floor((norm_size-cols)/2.0)))
rowsPadding = (int(math.ceil((norm_size-rows)/2.0)),int(math.floor((norm_size-rows)/2.0)))
return np.lib.pad(img, (rowsPadding,colsPadding, (0, 0)),'constant')
import os
dir = r"\Images\Rupee\DataSet"
count = 0
for dirpath, dnames, fnames in os.walk(dir):
for f in fnames:
full_path = os.path.join(dirpath, f)
processed = pre_process(full_path)
cv2.imwrite(fr"Images\Rupee\Processed\{f}.png", processed)
Bước split train/test có stratify:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
dir_name = r"D:\Computer vision\Images\Rupee\Processed"
labels = { 'One' : 0, 'Two' : 1, 'Five' : 2, 'Ten' : 3, 'Twenty' : 4 }
files = os.listdir(dir_name)
X = np.zeros((len(files), 300, 300, 3))
Y = np.zeros((len(files), 5))
for index, f in enumerate(files):
full_path = os.path.join(dir_name, f)
label = labels[f[:f.index('_')]]
img = cv2.imread(full_path)
X[index] = img
Y[index][label] = 1
X_train, X_test, y_train, y_test = train_test_split(
X, Y, random_state=104, test_size=0.3, shuffle=True, stratify=Y)
for index in range(len(X_train)):
np.save(fr'\Images\Rupee\Train\X\{index}.npy', X_train[index])
np.save(fr'\Images\Rupee\Train\Y\{index}.npy', y_train[index])
Kiến trúc CNN em dùng trên dataset đã augmented:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
import keras
data_size = 554
X_train = np.empty((data_size, 300, 300, 3))
y_train = np.empty((data_size, 5))
for index in range(data_size):
X_train[index] = np.load(fr"/content/Train/X/{index}.npy")
y_train[index] = np.load(fr"/content/Train/Y/{index}.npy")
datagen = keras.preprocessing.image.ImageDataGenerator()
iter = datagen.flow(X_train, y_train)
model = Sequential()
# convolutional layer
model.add(Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu',
input_shape=(300, 300, 3)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(Flatten())
# hidden layer
model.add(Dense(64, activation='relu'))
# output layer
model.add(Dense(5, activation='softmax'))
# compiling the sequential model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
model.fit(iter, epochs=15)
model.save(r'model.keras')
Train 99.46%, trong khi trên test chỉ có 33.33%. Không biết em có làm sai ở công đoạn nào không hay dữ liệu em lụm được là rác :<. Mong mọi người giúp ạ. Một vài ảnh đã qua tiền xử lý: