[go: nahoru, domu]

Skip to content

Commit

Permalink
Comments and post training visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
Henry Powell committed Oct 17, 2019
1 parent 0eac17b commit c1b2a69
Showing 1 changed file with 125 additions and 34 deletions.
159 changes: 125 additions & 34 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@

"""
Author: Henry Powell
Institution: Institute of Neuroscience, Glasgow University, Scotland.
Implementation of AlexNet using Keras with Tensorflow backend. Code will preload the Oxford_Flowers102 dataset.
Learning tracks the model's accuracy, loss, and top 5 error rate. For true comparision of performance to the original
model (Krizhevsky et al. 2010) this implementation will need to be trained on Imagenet2010.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
Expand All @@ -9,50 +20,70 @@
import functools
import numpy as np
import matplotlib.pyplot as plt
import sys

np.set_printoptions(threshold=sys.maxsize)


epochs = 100
# Set global variables.
epochs = 1
verbose = 1
batch_size = 100
steps_per_epoch = 10
n = 1

# Set the dataset which will be downladed and stored in system memory.
data_set = "oxford_flowers102"

scores = []
# This list keeps track of the training metrics for each iteration of training if you require to run an experiment.
# I.e. if you want to train the network n times and see how the training differs between each iteration of training.
acc_scores = []
loss_scores = []
top_5_acc_scores = []

# Set up the top 5 error rate metric.
top_5_acc = functools.partial(keras.metrics.top_k_categorical_accuracy, k=5)

top_5_acc.__name__ = 'top_5_acc'


def load_data():
"""
Function for loading and augmenting the training, testing, and validation data.
:return: images and labels as numpy arrays (the labels will be one-hot encoded) as well as an info object
containg information about the loaded dataset.
"""
# Load the data using TensorFlow datasets API.
data_train, info = tfds.load(name=data_set, split='test', with_info=True)
data_test = tfds.load(name=data_set, split='train')
val_data = tfds.load(name=data_set, split='validation')

# Ensure that loaded data is of the right type.
assert isinstance(data_train, tf.data.Dataset)
assert isinstance(data_test, tf.data.Dataset)

# Prints the dataset information.
print(info)

train_images = []
train_labels = []

for example in data_train.take(6149):
# Here we take all the samples in the training set (6149), convert the data type to float32 and resize.
# Since the images in Oxford_Flowers are not pre processed we need to resize them all so that the network
# takes inputs that are all the same size.
for example in data_train.take(-1):
image, label = example['image'], example['label']
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [256, 256])
train_images.append(image.numpy())
train_labels.append(label.numpy())

# We then convert the lists of images and labels to numpy arrays.
train_images = np.array(train_images)
train_labels = np.array(train_labels)
# And change the labels to one-hot encoded vectors (this is so we can use the categorical_cross entropy loss
# function.
train_labels = utils.to_categorical(train_labels)

# We now do as above but with the test and validation datasets.
test_images = []
test_labels = []

for example in data_test.take(1020):
for example in data_test.take(-1):
image, label = example['image'], example['label']
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [256, 256])
Expand All @@ -62,10 +93,9 @@ def load_data():
test_labels = np.array(test_labels)
test_labels = utils.to_categorical(test_labels)


val_images = []
val_labels = []
for example in val_data.take(1020):
for example in val_data.take(-1):
image, label = example['image'], example['label']
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [256, 256])
Expand All @@ -83,12 +113,31 @@ def load_data():


def visualize(data_train, data_test, info):
"""
Short function that visualizes the data set giving 9 samples from each of the training and test datasets
and their respective labels.
:param data_train: A tf.data.Dataset object containing the training data
:param data_test: A tf.data.Dataset object containing the test data
:param info: dataset.info for getting information about the dataset (number of classes, samples etc.)
:return: n/a
"""
tfds.show_examples(info, data_train)
tfds.show_examples(info, data_test)


def run_training(train_data, test_data, val_data):
"""
Build, compile, fit, and evaluate the AlexNet model using Keras.
:param train_data: a tf.data.Dataset object containing (image, label) tuples of training data.
:param test_data: a tf.data.Dataset object containing (image, label) tuples of test data.
:param val_data: a tf.data.Dataset object containing (image, label) tuples of validation data.
:return: trained model object.
"""

# Set up the sequential model
model = keras.Sequential()

# First layer: Convolutional layer with max pooling and batch normalization.
model.add(keras.layers.Conv2D(input_shape=(256, 256, 3),
kernel_size=(11, 11),
strides=(4, 4),
Expand All @@ -100,6 +149,7 @@ def run_training(train_data, test_data, val_data):
padding="valid"))
model.add(keras.layers.BatchNormalization())

# Second layer: Convolutional layer with max pooling and batch normalization.
model.add(keras.layers.Conv2D(kernel_size=(11, 11),
strides=(1, 1),
padding="valid",
Expand All @@ -110,20 +160,23 @@ def run_training(train_data, test_data, val_data):
padding="valid"))
model.add(keras.layers.BatchNormalization())

# Third layer: Convolutional layer with batch normalization.
model.add(keras.layers.Conv2D(kernel_size=(3, 3),
strides=(1, 1),
padding="valid",
filters=384,
activation=tf.nn.relu))
model.add(keras.layers.BatchNormalization())

# Fourth layer: Convolutional layer with batch normalization.
model.add(keras.layers.Conv2D(kernel_size=(3, 3),
strides=(1, 1),
padding="valid",
filters=384,
activation=tf.nn.relu))
model.add(keras.layers.BatchNormalization())

# Fifth layer: Convolutional layer with max pooling and batch normalization.
model.add(keras.layers.Conv2D(kernel_size=(3, 3),
strides=(1, 1),
padding="valid",
Expand All @@ -134,85 +187,123 @@ def run_training(train_data, test_data, val_data):
padding="valid"))
model.add(keras.layers.BatchNormalization())

# Flatten the output to feed it to dense layers
model.add(keras.layers.Flatten())

# Sixth layer: fully connected layer with 4096 neurons with 40% dropout and batch normalization.
model.add(keras.layers.Dense(units=4096,
input_shape=(256, 256, 3),
activation=tf.nn.relu))
model.add(keras.layers.Dropout(rate=0.4))
model.add(keras.layers.BatchNormalization())

# Seventh layer: fully connected layer with 4096 neurons with 40% dropout and batch normalization.
model.add(keras.layers.Dense(units=4096,
activation=tf.nn.relu))
model.add(keras.layers.Dropout(rate=0.4))
model.add(keras.layers.BatchNormalization())

# Eigth layer: fully connected layer with 1000 neurons with 40% dropout and batch normalization.
model.add(keras.layers.Dense(units=1000,
activation=tf.nn.relu))
model.add(keras.layers.Dropout(rate=0.4))
model.add(keras.layers.BatchNormalization())

# Output layer: softmax function of 102 classes of the dataset. This integer should be changed to match
# the number of classes in your dataset if you change from Oxford_Flowers.
model.add(keras.layers.Dense(units=102,
activation=tf.nn.softmax))

model.summary()

# Compile the model using Adam optimize and categorical cross entropy loss function. If your data is not one-hot
# encoded, change the loss to "sparse_categorical_crossentropy" which accepts integer valued labels rather than
# 1-0 arrays.
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['acc', top_5_acc])

history = model.fit(train_data,
epochs=epochs,
validation_data=val_data,
verbose=1,
steps_per_epoch=50)
# Fir the model on the training data and validate on the validation data.
model.fit(train_data,
epochs=epochs,
validation_data=val_data,
verbose=verbose,
steps_per_epoch=steps_per_epoch)

_, accuracy = model.evaluate(test_data,
verbose=1)
print(model.metrics_names)

scores.append(accuracy)
# Evaluate the model
loss, accuracy, top_5 = model.evaluate(test_data,
verbose=verbose,
steps=5)

# Append the metrics to the scores lists in case you are performing an experiment which involves comparing
# training over many iterations.
loss_scores.append(loss)
acc_scores.append(accuracy)
top_5_acc_scores.append(top_5)

return model


def predictions(model, test_images, test_labels):
"""
Display some examples of the predicions that the network is making on the testing data.
:param model: model object
:param test_images: tf.data.Dataset object containing the training data
:param test_labels: tf.data.Dataset object containing the testing data
:return: n/a
"""

predictions = model.predict(test_images)
print(predictions[0])

plt.subplot(1, 2, 1)
# Plot first predicted image
plt.imshow(test_images[0])
plt.show()

plt.subplot(1, 2, 2)
# Plot bar plot of confidence of predictions of possible classes for the first image in the test data
plt.bar([i for i in range(len(predictions[0]))], predictions[0])
plt.show()
print(np.argmax(predictions[0]))
print(test_labels[0])


def run_experiment(n):
"""
Run an experiment. One experiment loads the dataset, trains the model, and outputs the evaluation metrics after
training.
:param n: Number of experiments to perform
:return: n/a
"""
for experiments in range(n):
data_train, data_test, \
train_images, train_labels, \
test_images, test_labels, \
val_images, val_labels, \
info = load_data()

data_train, data_test, train_images, train_labels, test_images, test_labels, val_images, val_labels, info \
= load_data()

visualize(data_train, data_test, info)

# Print the first resized training image as a sanity check
plt.imshow(train_images[0])
plt.show()

# Make image, label paris into a tf.data.Dataset, shuffle the data and specify batch size.
train_data = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_data = train_data.repeat().shuffle(1024).batch(32)
train_data = train_data.repeat().shuffle(1024).batch(100)

test_data = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_data = test_data.repeat().shuffle(1024).batch(32)
test_data = test_data.repeat().shuffle(1024).batch(100)

val_data = tf.data.Dataset.from_tensor_slices((val_images, val_labels))
val_data = val_data.batch(64)
val_data = val_data.batch(100)

model = run_training(train_data, test_data, val_data)

predictions(model, test_images, test_labels)

print(scores)
print('M={}'.format(np.mean(scores)), 'STD={}'.format(np.std(scores)))
print('Min={}'.format(np.min(scores)), 'Max={}'.format(np.max(scores)))
# Print the mean, std, min, and max of the validation accuracy scores from your experiment.
print(acc_scores)
print('Mean_accuracy={}'.format(np.mean(acc_scores)), 'STD_accuracy={}'.format(np.std(acc_scores)))
print('Min_accuracy={}'.format(np.min(acc_scores)), 'Max_accuracy={}'.format(np.max(acc_scores)))


run_experiment(n)
Expand Down

2 comments on commit c1b2a69

@mikechen66
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Hnery:

While I tried to run the script of the main.py, it has the following errors. Please indicate how to solve the issues.

Before running the script, there is another warning: "InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.robots.ox.ac.uk'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings InsecureRequestWarning". After I added the following two lines of code, the InsecureRequestWarning disappeared.

import urllib3
urllib3.disable_warnings()

$ python main.py

Using TensorFlow backend.
Downloading and preparing dataset oxford_flowers102/2.1.0 (download: 336.76 MiB, generated: 331.34 MiB, total: 668.11 MiB) to /home/mike/tensorflow_datasets/oxford_flowers102/2.1.0...
Extraction completed...: 0 file [06:24, ? file/s]███| 1/1 [06:24<00:00, 384.36s/ url]
Dl Size...: 100%|████████████████████████████████| 328/328 [06:24<00:00, 1.17s/ MiB]
Dl Completed...: 100%|██████████████████████████████| 1/1 [06:24<00:00, 384.37s/ url]
Traceback (most recent call last):
File "main.py", line 431, in
run_experiment(n, large_data_set=False, generator=True)
File "main.py", line 397, in run_experiment
data_train, data_test, data_val, info = load_data()
File "main.py", line 73, in load_data
data_train, info = tfds.load(name=data_set, split='test', with_info=True)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/api_utils.py", line 53, in disallow_positional_args_dec
return fn(*args, **kwargs)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/registered.py", line 339, in load
dbuilder.download_and_prepare(**download_and_prepare_kwargs)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/api_utils.py", line 53, in disallow_positional_args_dec
return fn(*args, **kwargs)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/dataset_builder.py", line 364, in download_and_prepare
download_config=download_config)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/dataset_builder.py", line 1072, in _download_and_prepare
max_examples_per_split=download_config.max_examples_per_split,
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/dataset_builder.py", line 933, in _download_and_prepare
dl_manager, **split_generators_kwargs):
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/image_classification/oxford_flowers102.py", line 109, in _split_generators
"setid": os.path.join(_BASE_URL, "setid.mat"),
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/download/download_manager.py", line 419, in download_and_extract
return _map_promise(self._download_extract, url_or_urls)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/download/download_manager.py", line 462, in _map_promise
res = utils.map_nested(_wait_on_promise, all_promises)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/utils/py_utils.py", line 147, in map_nested
for k, v in data_struct.items()
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/utils/py_utils.py", line 147, in
for k, v in data_struct.items()
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/utils/py_utils.py", line 161, in map_nested
return function(data_struct)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/download/download_manager.py", line 446, in _wait_on_promise
return p.get()
File "/home/mike/miniconda3/lib/python3.7/site-packages/promise/promise.py", line 512, in get
return self._target_settled_value(_raise=True)
File "/home/mike/miniconda3/lib/python3.7/site-packages/promise/promise.py", line 516, in _target_settled_value
return self._target()._settled_value(_raise)
File "/home/mike/miniconda3/lib/python3.7/site-packages/promise/promise.py", line 226, in _settled_value
reraise(type(raise_val), raise_val, self._traceback)
File "/home/mike/miniconda3/lib/python3.7/site-packages/six.py", line 703, in reraise
raise value
File "/home/mike/miniconda3/lib/python3.7/site-packages/promise/promise.py", line 87, in try_catch
return (handler(*args, **kwargs), None)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/download/download_manager.py", line 306, in callback
resource, download_dir_path, checksum, dl_size)
File "/home/mike/miniconda3/lib/python3.7/site-packages/tensorflow_datasets/core/download/download_manager.py", line 261, in _handle_download_result
raise NonMatchingChecksumError(resource.url, tmp_path)
tensorflow_datasets.core.download.download_manager.NonMatchingChecksumError: Artifact https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz, downloaded to /home/mike/tensorflow_datasets/downloads/robots.ox.ac.uk_vgg_flowers_102_102flowersoWedSp98maBn1wypsDib6T-q2NVbO40fwvTflmPmQpY.tgz.tmp.daafa0193a184f4b896baa1bf35573a9/102flowers.tgz, has wrong checksum. This might indicate:

@henrypowell87
Copy link
Owner
@henrypowell87 henrypowell87 commented on c1b2a69 Apr 28, 2020 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.