da_resnet18_mnist.py


from __future__ import print_function

from collections import defaultdict
import cPickle as pickle
from PIL import Image

from six.moves import range
import sys
sys.setrecursionlimit(2**25)

import keras.backend as K
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout
from keras.layers.core import Activation
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Convolution2D, MaxPooling2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.utils.generic_utils import Progbar
import numpy as np
import resnet


# from pyimagesearch.cnn.networks import LeNet
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adamax, Nadam

from keras.utils import np_utils

np.random.seed(31337)

K.set_image_dim_ordering('th')


def build_generator(latent_size):
    # we will map a pair of (z, L), where z is a latent vector and L is a
    # label drawn from P_c, to image space (..., 1, 28, 28)
    cnn = Sequential()

    cnn.add(Dense(1024, input_dim=latent_size, activation='relu'))
    cnn.add(Dense(128 * 7 * 7, activation='relu'))
    cnn.add(Reshape((128, 7, 7)))

    # upsample to (..., 14, 14)
    cnn.add(UpSampling2D(size=(2, 2)))
    cnn.add(Convolution2D(256, 5, 5, border_mode='same',
                          activation='relu', init='glorot_normal'))

    # upsample to (..., 28, 28)
    cnn.add(UpSampling2D(size=(2, 2)))
    cnn.add(Convolution2D(128, 5, 5, border_mode='same',
                          activation='relu', init='glorot_normal'))

    # take a channel axis reduction
    cnn.add(Convolution2D(1, 2, 2, border_mode='same',
                          activation='tanh', init='glorot_normal'))

    # this is the z space commonly refered to in GAN papers
    latent = Input(shape=(latent_size, ))

    # this will be our label
    image_class = Input(shape=(1,), dtype='int32')

    # 10 classes in MNIST
    cls = Flatten()(Embedding(10, latent_size,
                              init='glorot_normal')(image_class))

    # hadamard product between z-space and a class conditional embedding
    h = merge([latent, cls], mode='mul')

    fake_image = cnn(h)

    return Model(input=[latent, image_class], output=fake_image)


def build_discriminator():
    # build a relatively standard conv net, with LeakyReLUs as suggested in
    # the reference paper
    cnn = Sequential()

    cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2),
                          input_shape=(1, 28, 28)))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

    cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1)))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

    cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2)))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

    cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1)))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

    cnn.add(Flatten())

    image = Input(shape=(1, 28, 28))

    features = cnn(image)

    # output (name=generation) is whether or not the discriminator
    # thinks the image that is being shown is fake.
    fake = Dense(1, activation='sigmoid', name='generation')(features)

    return Model(input=image, output=fake)


def build_resnet():  # source: https://github.com/raghakot/keras-resnet
    # Model

    model = resnet.ResnetBuilder.build_resnet_18((1, 28, 28), 10)
    # model = resnet.ResnetBuilder.build_resnet_34((1, 28, 28), 10)
    # model = resnet.ResnetBuilder.build_resnet_50((1, 28, 28), 10)
    # model = resnet.ResnetBuilder.build_resnet_101((1, 28, 28), 10)
    # model = resnet.ResnetBuilder.build_resnet_152((1, 28, 28), 10)

    image = Input(shape=(1, 28, 28))

    aux = model(image)

    return Model(input=image, output=aux)
    # return model

if __name__ == '__main__':

    # batch and latent size taken from the paper
    nb_epochs = 100
    batch_size = 100
    latent_size = 100
    nb_classes = 10
    # Adam parameters suggested in https://arxiv.org/abs/1511.06434
    adam_lr = 0.0002
    adam_beta_1 = 0.5

    # build the discriminator
    discriminator = build_discriminator()
    opt = SGD(lr=0.01)
    discriminator.compile(
        optimizer=opt,
        loss= 'binary_crossentropy')

    # build the classifier
    resnet = build_resnet()
    resnet.compile(loss="categorical_crossentropy", optimizer='adadelta', metrics=["accuracy"])

    # build the generator
    generator = build_generator(latent_size)
    generator.compile(optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
                      loss='binary_crossentropy')

    latent = Input(shape=(latent_size, ))
    image_class = Input(shape=(1,), dtype='int32')

    # get a fake image
    fake_img = generator([latent, image_class])

    # we only want to be able to train generation for the combined model
    discriminator.trainable = False
    resnet.trainable = False

    fake = discriminator(fake_img)
    aux = resnet(fake_img)

    combined = Model(input=[latent, image_class], output=[fake, aux])

    combined.compile(
        optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
        loss=['binary_crossentropy', 'categorical_crossentropy']
    )

    # get our mnist data, and force it to be of shape (..., 1, 28, 28) with
    # range [-1, 1]
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = (X_train.astype(np.float32) - 127.5) / 127.5
    X_train = np.expand_dims(X_train, axis=1)

    X_test = (X_test.astype(np.float32) - 127.5) / 127.5
    X_test = np.expand_dims(X_test, axis=1)

    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)
    nb_train, nb_test = X_train.shape[0], X_test.shape[0]

    train_history = defaultdict(list)
    test_history = defaultdict(list)

    # fo = open("accuracy_save.txt", "wb")
    for epoch in range(nb_epochs):
        print('Epoch {} of {}'.format(epoch + 1, nb_epochs))

        nb_batches = int(X_train.shape[0] / batch_size)
        progress_bar = Progbar(target=nb_batches)

        epoch_gen_loss = []
        epoch_disc_loss = []
        epoch_resnet_loss =[]

        for index in range(nb_batches):
            progress_bar.update(index)
            # generate a new batch of noise
            noise = np.random.normal(loc=0.0, scale=1, size=(batch_size, latent_size))
            # noise = np.random.uniform(-1, 1, (batch_size, latent_size))
            # get a batch of real images
            image_batch = X_train[index * batch_size:(index + 1) * batch_size]
            label_batch = y_train[index * batch_size:(index + 1) * batch_size]

            # sample some labels from p_c
            sampled_labels = np.random.randint(0, 10, batch_size)

            # generate a batch of fake images, using the generated labels as a
            # conditioner. We reshape the sampled labels to be
            # (batch_size, 1) so that we can feed them into the embedding
            # layer as a length one sequence
            generated_images = generator.predict(
                [noise, sampled_labels.reshape((-1, 1))], verbose=0)

            X = np.concatenate((image_batch, generated_images))
            y = np.array([1] * batch_size + [0] *batch_size)
            aux_y = np.concatenate((label_batch, sampled_labels), axis=0)
            aux_y = np_utils.to_categorical(aux_y, 10)

            # see if the discriminator can figure itself out...
            epoch_disc_loss.append(discriminator.train_on_batch(X, y))
            #
            epoch_resnet_loss.append(resnet.train_on_batch(X, aux_y))

            # make new noise. we generate 2 * batch size here such that we have
            # the generator optimize over an identical number of images as the
            # discriminator
            noise = np.random.normal(loc=0.0, scale=1, size=(2 * batch_size, latent_size))
            # noise = np.random.uniform(-1, 1, (2 * batch_size, latent_size))
            sampled_labels = np.random.randint(0, 10, 2 * batch_size).reshape(-1, 1)
            aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10)

            # we want to train the generator to trick the discriminator
            # For the generator, we want all the {fake, not-fake} labels to say
            # not-fake
            trick = np.ones(2 * batch_size)

            epoch_gen_loss.append(combined.train_on_batch(
                [noise, sampled_labels], [trick, aux_sampled_labels]))

        print('\nTesting for epoch {}:'.format(epoch + 1))

        # evaluate the testing loss here

        # generate a new batch of noise
        noise = np.random.normal(loc=0.0, scale=1, size=(nb_test, latent_size))
        # noise = np.random.uniform(-1, 1, (nb_test, latent_size))

        # sample some labels from p_c and generate images from them
        sampled_labels = np.random.randint(0, 10, nb_test)
        generated_images = generator.predict(
            [noise, sampled_labels.reshape((-1, 1))], verbose=False)

        X = np.concatenate((X_test, generated_images))
        y = np.array([1] * nb_test + [0] * nb_test)
        aux_y = np.concatenate((y_test, sampled_labels), axis=0)
        aux_y = np_utils.to_categorical(aux_y, 10)

        # see if the discriminator can figure itself out...
        discriminator_test_loss = discriminator.evaluate(X, y, verbose=False)

        discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0)

        resnet_test_loss = resnet.evaluate(X, aux_y, verbose=False)

        resnet_train_loss = np.mean(np.array(epoch_resnet_loss), axis=0)

        # # evaluate the test classification accuracy
        #
        # (loss, accuracy) = resnet.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0)
        #
        # # show the accuracy on the testing set
        # print("\n[INFO] accuracy: {:.2f}%".format(accuracy * 100))
        #
        # fo.write('Test accuracy at the ' + str(epoch+1) + '-th iteration is: ' + str(accuracy) + '\n')


        # make new noise
        noise = np.random.normal(loc=0.0, scale=1, size=(2 * nb_test, latent_size))
        # noise = np.random.uniform(-1, 1, (2 * nb_test, latent_size))
        sampled_labels = np.random.randint(0, 10, 2 * nb_test).reshape(-1, 1)
        aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10)

        trick = np.ones(2 * nb_test)

        generator_test_loss = combined.evaluate(
            [noise, sampled_labels],
            [trick, aux_sampled_labels], verbose=False)

        generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0)

        # generate an epoch report on performance
        train_history['generator'].append(generator_train_loss)
        train_history['discriminator'].append(discriminator_train_loss)
        train_history['resnet'].append(resnet_train_loss)

        test_history['generator'].append(generator_test_loss)
        test_history['discriminator'].append(discriminator_test_loss)
        test_history['resnet'].append(resnet_test_loss)


        # save weights every epoch
        generator.save_weights(
            'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
        discriminator.save_weights(
            'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True)
        resnet.save_weights(
            'params_resnet_epoch_{0:03d}.hdf5'.format(epoch), True)


    pickle.dump({'train': train_history, 'test': test_history},
                open('acgan-history.pkl', 'wb'))

    # evaluate the test classification accuracy
    (loss, accuracy) = resnet.evaluate(X_test, Y_test,
                                       batch_size=batch_size, verbose=0)

    # show the accuracy on the testing set
    print("\n [INFO] Test accuracy: {:.2f}%".format(accuracy * 100))


    # fo.close()