Evaluating CNN
Now, let's evaluate our model on the test data. We've already loaded a trained model under /creatica/code/model
, so we can load it later. Let's start!
Importing libraries and defining paths and constants¶
import os
import argparse
import simplejson
import matplotlib.pyplot as plt
import cv2
import numpy as np
import glob
import tensorflow as tf
import keras
from keras.models import model_from_json
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.applications.inception_v3 import InceptionV3, preprocess_input
keras.backend.clear_session()
#define paths and constants
data_path = "/Users/victorialiu/git/creatica/code/data/"
test_img_path = os.path.join(data_path, 'test')
batch_size = 16
TARGET_SIZE = 299
Command Line Argument Parser¶
We want to be able to run our code from the command line (at least in the .py
version of this notebook), so we use an argument parser to translate command line arguments. We require a model name when running the script, the same model that was saved earlier.
## Parse command line arguments
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--model-name',
help='prefix for saved trained model we want to evaluate ' +
'(e.g. dense_arch1, conv_regularize05, etc.)',
required=True)
return parser.parse_args()
Image Pre-processing with InceptionV3 net¶
Here, we pre-process our images by extracting their features using InceptionV3.
def get_data():
#load training data and define labels, where 0 is hotdog and 1 is nothotdog
train_data = np.load('inception_features_train.npz')['features']
#requires the number of hotdog and nothotdog samples to be the exact same
train_data_type_count = int(len(train_data) / 3)
train_labels = np.array(
[0] * train_data_type_count +
[1] * train_data_type_count +
[2] * train_data_type_count
)
#load testing data and define labels, where 0 is hotdog and 1 is nothotdog
test_data = np.load('inception_features_test.npz')['features']
#requires the number of hotdog and nothotdog samples to be the exact same
test_data_type_count = int(len(test_data) / 3)
test_labels = np.array(
[0] * test_data_type_count +
[1] * test_data_type_count +
[2] * test_data_type_count
)
# Convert labels to one-hot vectors (probability distributions w/
# probability 1 assigned to the correct label)
train_labels = keras.utils.to_categorical(train_labels)
test_labels = keras.utils.to_categorical(test_labels)
return (train_data, train_labels, test_data, test_labels)
train_data, train_labels, test_data, test_labels = get_data()
Get all incorrectly classified images¶
Lots of indices and logic-ing here, but we're just extracting the file paths of all the incorrectly classified images, so that we can take a look at them. The get_image()
function also saves the plot under /creatica/code/img
, and when run on the command line, the incorrectly classified pictures will pop up one by one.
def incorrect_labels_indices(rounded_predictions, test_labels):
incorrect_classification_indices = []
incorrect_labels_of_misclassified = []
correct_labels_of_misclassified = []
for i in range(rounded_predictions.shape[0]):
if rounded_predictions[i] != np.argmax(
test_labels[i], axis = -1
):
incorrect_classification_indices.append(i)
correct_labels_of_misclassified.append(np.argmax(test_labels[i], axis = -1))
incorrect_labels_of_misclassified.append(rounded_predictions[i])
# print(f'correct labels: \n {correct_labels_of_misclassified}')
return (
incorrect_classification_indices,
correct_labels_of_misclassified,
incorrect_labels_of_misclassified
)
def get_incorrect_classification_fpaths(incorrect_tuple, rounded_predictions):
"""
incorrect_tuple of form:
(incorrect_classification_indices, correct_labels)
"""
#how many pictures per category (should be 50 for every class)
index_offset = len(glob.glob(os.path.join(data_path, 'test/hotdog/*.jpg')))
#keep track of file paths of incorrect pictures for plotting
incorrect_classification_file_paths = []
#dictionary of one hot vectors to categories
one_hot_categories = {
0 : 'bananas',
1 : 'carrots',
2 : 'hotdog'
}
# print(f'len incorrect tuple: {len(incorrect_tuple)}')
incorrect_classification_indices, correct_labels_of_misclassified, _ = incorrect_tuple
#get incorrect file paths and true values
for j in range(len(correct_labels_of_misclassified)):
i = incorrect_classification_indices[j] % index_offset
incorrect_classification_file_paths.append(
os.path.join(
f'{one_hot_categories[correct_labels_of_misclassified[j]]}', os.listdir(
os.path.join(test_img_path, f'{one_hot_categories[correct_labels_of_misclassified[j]]}')
)[i]
))
return incorrect_classification_file_paths
def get_image(file_path, pred_value, true_value, cwd, model_name, i):
one_hot_categories = {
0 : 'bananas',
1 : 'carrots',
2 : 'hotdog'
}
pred_value = one_hot_categories[pred_value]
true_value = one_hot_categories[true_value]
if '.DS_Store' in file_path:
pass
elif os.path.isfile(
os.path.join(test_img_path, file_path)):
image_bgr = cv2.imread(os.path.join(test_img_path, file_path),cv2.IMREAD_COLOR)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
image_rgb_resized = cv2.resize(image_rgb, (TARGET_SIZE, TARGET_SIZE), interpolation = cv2.INTER_CUBIC)
plt.imshow(image_rgb_resized)
plt.axis("off")
plt.title('Pred: %s\nTrue: %s' % (pred_value, true_value))
plt.savefig(os.path.join(cwd, 'img', '%s_mistake_%s.png') % (model_name, str(i)))
plt.show()
Let's Go!!¶
Here, we write the main function the command line executes.. we'll print out the training loss, training accuracy, validation loss, and validation accuracy. Remember, we are doing binary cross-entropy for loss and binary accuracy for accuracy.
def main():
# comment out if using command line
model_name = 'multiclass_debugging'
# # comment out if running from ipynb
# args = parse_args()
# model_name = args.model_name
# Remove src from cwd if necessary
cwd = os.getcwd()
if os.path.basename(cwd) == 'src': cwd = os.path.dirname(cwd)
# Create img directory to save images if needed
os.makedirs(os.path.join(cwd, 'img'), exist_ok=True)
# Create model directory to save models if needed
os.makedirs(os.path.join(cwd, 'model'), exist_ok=True)
model_weights_fname = os.path.join(cwd, 'model', model_name + '.h5')
model_json_fname = os.path.join(cwd, 'model', model_name + '.json')
# Load hotdog model and its weights
with open(model_json_fname, 'r') as f: model_json = f.read()
model = model_from_json(model_json)
model.load_weights(model_weights_fname)
# Get hotdog data
(train_data, train_labels, test_data, test_labels) = get_data()
# Compile model and evaluate its performance on training and test data
model.compile(loss='binary_crossentropy', optimizer='adam',
metrics=['accuracy'])
score = model.evaluate(train_data, train_labels, verbose=0)
print()
print('Training loss:', score[0])
print('Training accuracy:', score[1])
score = model.evaluate(test_data, test_labels, verbose=0)
print()
print('Validation loss:', score[0])
print('Validation accuracy:', score[1])
#print and save pictures that were inaccurately classified?
# use previously written helper functions to find incorrect classification indices,
# and the file paths to incorrectly classified images
rounded_predictions = np.argmax(model.predict(test_data, batch_size=batch_size, verbose=1), axis = -1)
#get incorrectly classified indices and their labels
incorrect_tuple = incorrect_labels_indices(
rounded_predictions, test_labels
)
#get file paths for incorrectly categorized pics to plot them later
incorrect_classification_file_paths = get_incorrect_classification_fpaths(
incorrect_tuple,
rounded_predictions
)
#get correct_labels
_, correct_labels_of_misclassified, incorrect_labels_of_misclassified = incorrect_tuple
for i in range(len(correct_labels_of_misclassified)):
#let's print every 10th image for fun
if i // 10 == 0:
get_image(
incorrect_classification_file_paths[i],
incorrect_labels_of_misclassified[i],
correct_labels_of_misclassified[i],
cwd, model_name, i
)
return rounded_predictions
# if __name__ == '__main__': main()
main()
Training loss: 0.12234710156917572 Training accuracy: 0.9425837397575378 Validation loss: 0.15481112897396088 Validation accuracy: 0.9200000166893005 10/10 [==============================] - 0s 10ms/step Some incorrectly classified images
array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
Interesting. I can definitely see why some of the images were falsely classified as they were... hopefully the computer's vision will get better in the future!
Authors: Victoria Liu and Gloria Liu
Last modified: November 2020
Description: A script to evaluate a saved neural net that should recognize hot dogs vs. carrots vs. bananas.
Credits: Parts of the code are originally part of a Caltech extra credit assignment (CS 156a), where Aadyot Bhatnagar wrote the parse_args() function. The image augmentation code is heavily modified from J-Yash's open-source code.