Dữ liệu về coin ruppe trên kaggle bị overfit

anonymous280 · March 20, 2024, 2:36am

Link dataset: Indian Coin Denomination Dataset (ICDD) (kaggle.com)

Em đã cắt ảnh và chuẩn hóa về 300x300 nhờ Otsu và fitElipse. Có 128/900 bức bị xóa thủ công do không thể cắt(22%). Code tiền xử lý:

import cv2
import numpy as np
import copy
import math
def get_to_edge(start, direction, threshold, h_frame, w_frame):
    x = start[0]
    y = start[1]
    while True:
        black_cnt = (w_frame[1] - w_frame[0]) * (h_frame[1] - h_frame[0]) - np.count_nonzero(threshold[
        x + h_frame[0]: x + h_frame[1],
        y + w_frame[0]: y + w_frame[1]])
        if black_cnt >= 3:
            break
        x += direction[0]
        y += direction[1]
    return (x, y)
def get_bound(threshold):
    p1 = get_to_edge((0, threshold.shape[1] // 2), (1, 0), threshold,
                     (0, 20), (-5, 5))
    p2 = get_to_edge((threshold.shape[0] - 1, threshold.shape[1] // 2), (-1, 0), threshold,
                     (-20, 0), (-5, 5))
    p3 = get_to_edge((threshold.shape[0] // 2, 0), (0, 1), threshold,
                     (-5, 5), (0, 20))
    p4 = get_to_edge((threshold.shape[0] // 2, threshold.shape[1] - 1), (0, -1), threshold,
                     (-5, 5), (-20, 0))
    p5 = get_to_edge((0, 0), (3, 4), threshold, (0, 20), (0, 20))
   
    p6 = get_to_edge((threshold.shape[0] - 1, threshold.shape[1] - 1), (-3, -4), threshold,
                     (-10, 0), (-10, 0))
    # cv2.circle(img, p1[::-1], 10, (0, 0, 255), -1)
    # cv2.circle(img, p2[::-1], 10, (0, 0, 255), -1)
    # cv2.circle(img, p3[::-1], 10, (0, 0, 255), -1)
    # cv2.circle(img, p4[::-1], 10, (0, 0, 255), -1)
    # cv2.circle(img, p5[::-1], 10, (0, 0, 255), -1)
    # cv2.circle(img, p6[::-1], 10, (0, 0, 255), -1)
    return p1[0], p2[0], p3[1], p4[1], cv2.fitEllipse(np.array([
        p1[::-1],
        p2[::-1],
        p3[::-1],
        p4[::-1],
        p5[::-1],
        p6[::-1]]))
def to_int_tup(tup):
    return (int(tup[0]), int(tup[1]))
def get_axes_length(tup):
    return (int(tup[0] / 2), int(tup[1] / 2))
def pre_process(path):
    img = cv2.imread(path)
    gray = cv2.imread(path, 0)
    _, thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    cv2.imwrite("thres.png", thresholded)
    h_start, h_end, w_start, w_end, params = get_bound(thresholded)
    #cv2.imwrite(r'points.png', img)
    h, k = params[0]
    a, b = params[1]
    a = (a / 2 + 10)
    b = (b / 2 + 10)
    rad = params[2] * math.pi / 180
    for x in range(w_start, w_end):
        for y in range(h_start, h_end):
            first_term = ((x - h) * math.cos(rad) + (y - k) * math.sin(rad))**2 / a**2
            second_term = ((x - h) * math.sin(rad) - (y - k) * math.cos(rad))**2 / b**2
            if first_term + second_term > 1:
                img[y, x] = (0, 0, 0)
    img = img[h_start:h_end, w_start:w_end]
    rows, cols = img.shape[0:2]
    win_size = 200
    norm_size = 300
    if rows > cols:
        cols = int(cols * (win_size / rows))
        rows = win_size
    else:
        rows = int(rows * (win_size / cols))
        cols = win_size
    img = cv2.resize(img, (cols, rows))
    colsPadding = (int(math.ceil((norm_size-cols)/2.0)),int(math.floor((norm_size-cols)/2.0)))
    rowsPadding = (int(math.ceil((norm_size-rows)/2.0)),int(math.floor((norm_size-rows)/2.0)))
    return np.lib.pad(img, (rowsPadding,colsPadding, (0, 0)),'constant')
import os
dir = r"\Images\Rupee\DataSet"
count = 0
for dirpath, dnames, fnames in os.walk(dir):
    for f in fnames:
        full_path = os.path.join(dirpath, f)
       
        processed = pre_process(full_path)
        cv2.imwrite(fr"Images\Rupee\Processed\{f}.png", processed)

Bước split train/test có stratify:

import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
dir_name = r"D:\Computer vision\Images\Rupee\Processed"
labels = { 'One' : 0, 'Two' : 1, 'Five' : 2, 'Ten' : 3, 'Twenty' : 4 }
files = os.listdir(dir_name)
X = np.zeros((len(files), 300, 300, 3))
Y = np.zeros((len(files), 5))
for index, f in enumerate(files):
    full_path = os.path.join(dir_name, f)
    label = labels[f[:f.index('_')]]
    img = cv2.imread(full_path)
   
    X[index] = img
    Y[index][label] = 1
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, random_state=104, test_size=0.3, shuffle=True, stratify=Y)
for index in range(len(X_train)):
    np.save(fr'\Images\Rupee\Train\X\{index}.npy', X_train[index])
    np.save(fr'\Images\Rupee\Train\Y\{index}.npy', y_train[index])

Kiến trúc CNN em dùng trên dataset đã augmented:

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
import keras
data_size = 554
X_train = np.empty((data_size, 300, 300, 3))
y_train = np.empty((data_size, 5))
for index in range(data_size):
  X_train[index] = np.load(fr"/content/Train/X/{index}.npy")
  y_train[index] = np.load(fr"/content/Train/Y/{index}.npy")
datagen = keras.preprocessing.image.ImageDataGenerator()
iter = datagen.flow(X_train, y_train)
model = Sequential()
# convolutional layer
model.add(Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu',
                 input_shape=(300, 300, 3)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(Flatten())
# hidden layer
model.add(Dense(64, activation='relu'))
# output layer
model.add(Dense(5, activation='softmax'))
# compiling the sequential model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
model.fit(iter, epochs=15)
model.save(r'model.keras')

Train 99.46%, trong khi trên test chỉ có 33.33%. Không biết em có làm sai ở công đoạn nào không hay dữ liệu em lụm được là rác :<. Mong mọi người giúp ạ. Một vài ảnh đã qua tiền xử lý:

anonymous280 · March 20, 2024, 1:08pm

Mô hình tối >= 4 conv layer mới giải quyết được dataset. Em sửa thành:

model = Sequential()
# convolutional layer
model.add(Conv2D(128, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu',
                 input_shape=(300, 300, 3)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(32, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(16, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(5, activation='softmax'))

Kiến trúc này cho 93.28% trên test.