W&B - Evaluating Hyperparameter sweeps with W&B
# WandB – Install the W&B library
%pip install wandb -q
import wandb
# from wandb.keras import WandbCallback
characters = glob.glob('simpsons-dataset/kaggle_simpson_testset/kaggle_simpson_testset/**')
# characters = glob.glob('simpsons-dataset/simpsons_dataset/simpsons_dataset/*/**')
plt.figure(figsize=(16,11))
plt.subplots_adjust(wspace=0, hspace=0.1)
i = 0
for character in characters[:15]:
img = cv2.imread(character)
img = cv2.resize(img, (250, 250))
plt.subplot(3, 5, i+1)
plt.title(character.split('testset/')[-1])
# plt.title(character.split('/')[-2])
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.axis('off')
i += 1
# Define the labels for the Simpsons characters we're detecting
character_names = {0: 'abraham_grampa_simpson', 1: 'apu_nahasapeemapetilon', 2: 'bart_simpson',
3: 'charles_montgomery_burns', 4: 'chief_wiggum', 5: 'comic_book_guy', 6: 'edna_krabappel',
7: 'homer_simpson', 8: 'kent_brockman', 9: 'krusty_the_clown', 10: 'lenny_leonard', 11:'lisa_simpson',
12: 'marge_simpson', 13: 'mayor_quimby',14:'milhouse_van_houten', 15: 'moe_szyslak',
16: 'ned_flanders', 17: 'nelson_muntz', 18: 'principal_skinner', 19: 'sideshow_bob'}
img_size = 64
num_classes = 20
dir = "simpsons-dataset/simpsons_dataset/simpsons_dataset"
# Load training data
X_train = []
y_train = []
for label, name in character_names.items():
list_images = os.listdir(dir+'/'+name)
for image_name in list_images:
image = imageio.imread(dir+'/'+name+'/'+image_name)
X_train.append(cv2.resize(image, (img_size,img_size)))
y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
# Split data for cross validation
X_test = X_train[-100:]
y_test = y_train[-100:]
X_train = X_train[:-100]
y_train = y_train[:-100]
# Normalize the data
X_train = X_train / 255.0
X_test = X_test / 255.0
# One hot encode the labels (neural nets only like numbers)
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)
len(X_train), len(y_train), len(X_test), len(y_test)
plt.figure(figsize=(16,10))
plt.subplots_adjust(wspace=0, hspace=0.1)
p = 1
for i in range(0, len(X_train), len(X_train)//14):
img = X_train[i]
label = character_names[y_train[i].argmax(0)]
img = cv2.resize(img, (250, 250))
plt.subplot(3, 5, p)
p += 1
plt.title(label)
plt.imshow(img)
plt.axis('off')
Run A Sweep
I ran a hyperparameter sweep in the weights and biases tool with 32 runs and you can view the report here: wandb.ai/chrismilleruk/reports/Exploring-W-B-Sweeps
Here you can see the bayesian algorithm gradually improved it's prediction of which combination of hyperparameters to attempt.
Here are all the hyperparameters laid out in an (interactive) visualisation.
A filtered view of the hyperparameters that yielded >80% accuracy (with >90% highlighted) allows some conclusions to be drawn if further sweeps are required.
entity = 'sweep'
project = 'simpsons'
sweep_id = "uqg7jmld"
# import wandb
api = wandb.Api()
sweep = api.sweep(entity + "/" + project + "/" + sweep_id)
runs = sorted(sweep.runs, key=lambda run: run.summary.get("val_accuracy", 0), reverse=True)
val_acc = runs[0].summary.get("val_accuracy", 0)
print(f"Best run {runs[0].name} with {val_acc * 100}% validation accuracy")
runs[0].file("model-best.h5").download(replace=True)
print("Best model saved to model-best.h5")
# Recreate the exact same model, including its weights and the optimizer
model = tf.keras.models.load_model('model-best.h5')
# Show the model architecture
model.summary()
def get_prediction(x, y):
# Resize image and normalize it
pic = cv2.resize(x, (64, 64)).astype('float32')
if pic.max() > 1.: pic = pic / 255.
# Get predictions for the character
prediction = model.predict(pic.reshape(1, 64, 64, 3))[0]
# Get true name of the character
character = character_names[y]
name = character.split('_')[0].title()
# Format predictions to string to overlay on image
text = sorted(['{:s} : {:.1f}%'.format(character_names[k].split('_')[0].title(), 100*v) for k,v in enumerate(prediction)],
key=lambda x:float(x.split(':')[1].split('%')[0]), reverse=True)[:3]
# Upscale original image (expecting a 0-255 range here)
img = cv2.resize(x, (352, 352))
if np.issubdtype(img.dtype, 'float'): img = (img * 255).astype('uint8')
# Create background to overlay text on
cv2.rectangle(img, (0,260),(215,352),(255,255,255), -1)
# Add text to image
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(img, 'Name : %s' % name, (10, 280), font, 0.7,(73,79,183), 2, cv2.LINE_AA)
for k, t in enumerate(text):
color = (10,100,10) if name in t else (80,0,0)
cv2.putText(img, t, (10, 300+k*18), font, 0.65, color, 2, cv2.LINE_AA)
title = "%s: %s" % (name, text[0])
return img, title
plt.figure(figsize=(18,8))
plt.subplots_adjust(wspace=0, hspace=0.1)
p = 1
for i in range(0, len(X_test), len(X_test) // 9):
plt.subplot(2, 5, p)
p += 1
x = X_test[i]
y = y_test[i].argmax()
(img, label) = get_prediction(x, y)
plt.imshow( img )
plt.title( label )
plt.axis('off')
The predictions are all great. Very confident and correct.
But there is a problem here. While the predictions are all great it looks like the validation data contains only pictures of Sideshow Bob.
This is a problem that needs to be fixed but how will the model perform against a dataset it's never seen before?
def predict_test():
predicted_images = []
for i in range(20):
character = character_names[i]
# Read in a character image from the test dataset
image = cv2.imread(np.random.choice([k for k in glob.glob('simpsons-dataset/kaggle_simpson_testset/kaggle_simpson_testset/*.*') if character in k]))
# print(image.shape)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
(img, title) = get_prediction(image, i)
predicted_images.append((img, title))
return predicted_images
predicted = predict_test()
plt.figure(figsize=(18,18))
plt.subplots_adjust(wspace=0, hspace=0)
p = 1
for i in range(0, len(predicted)):
img = predicted[i][0]
label = predicted[i][1]
img = cv2.resize(img, (250, 250))
plt.subplot(4, 5, p)
p += 1
plt.title(label)
plt.imshow(img)
plt.axis('off')
Not bad considering the flaw in our methodology. I've run this a few times now and we occasionally drop one or two for each batch of 25. It's probably around 90% at a guess.
Let's check the whole kaggle test set.
testset = glob.glob('simpsons-dataset/kaggle_simpson_testset/kaggle_simpson_testset/*.jpg')
img_size = 64
x_testset = []
y_testset = []
for i in range(len(testset)):
path = testset[i]
image = imageio.imread(path)
image = cv2.resize(image, (img_size,img_size))
if image.shape != (img_size, img_size, 3): continue
filename = path.split('testset/')[-1]
names = [k for k, v in character_names.items() if v in filename]
if not names: continue
x_testset.append(image)
y_testset.append(names[0])
x_testset = np.array(x_testset)
y_testset = np.array(y_testset)
# Normalise image data
x_testset = x_testset / 255
print('Making model predictions for %d images' % len(x_testset))
%time prediction = model.predict(x_testset)
predictions = prediction.argmax(1)
print('\nCompare first 24 predictions:')
print(predictions[:24])
print(y_testset[:24])
arr = predictions == y_testset
print('\nAccuracy', np.sum(arr), '/', len(arr))
print(np.sum(arr) / len(arr) * 100, '%')
confusion = confusion_matrix(y_testset, predictions)
labels = list(character_names.values())
plot_conf_matrix(confusion, labels, "Confusion Matrix for best model in W&B sweep")