snrazavi
diff --git a/‎MLiP-week08/08 Non-Linear Classification (ANN).ipynb
+1,010 b/‎MLiP-week08/08 Non-Linear Classification (ANN).ipynb
+1,010
diff --git a/‎MLiP-week08/data_utils.py
+79 b/‎MLiP-week08/data_utils.py
+79
diff --git a/‎MLiP-week08/imgs/08-2-three-layer-NN.jpg
39.9 KB b/‎MLiP-week08/imgs/08-2-three-layer-NN.jpg
39.9 KB
diff --git a/‎MLiP-week08/imgs/8-1NN.jpg
29.6 KB b/‎MLiP-week08/imgs/8-1NN.jpg
29.6 KB
diff --git a/‎MLiP-week08/imgs/8-2Different_update_rules.gif
591 KB b/‎MLiP-week08/imgs/8-2Different_update_rules.gif
591 KB
diff --git a/‎MLiP-week08/imgs/8-3Batch_normalization.jpg
140 KB b/‎MLiP-week08/imgs/8-3Batch_normalization.jpg
140 KB
diff --git a/‎MLiP-week08/imgs/8-4Dropout.jpg
131 KB b/‎MLiP-week08/imgs/8-4Dropout.jpg
131 KB
diff --git a/‎MLiP-week08/layers.py
+139 b/‎MLiP-week08/layers.py
+139
@@ -0,0 +1,79 @@
+from six.moves import cPickle as pickle
+import numpy as np
+import os
+from scipy.misc import imread
+import platform
+
+def load_pickle(f):
+    version = platform.python_version_tuple()
+    if version[0] == '2':
+        return  pickle.load(f)
+    elif version[0] == '3':
+        return  pickle.load(f, encoding='latin1')
+    raise ValueError("invalid python version: {}".format(version))
+
+def load_CIFAR_batch(filename):
+  """ load single batch of cifar """
+  with open(filename, 'rb') as f:
+    datadict = load_pickle(f)
+    X = datadict['data']
+    Y = datadict['labels']
+    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
+    Y = np.array(Y)
+    return X, Y
+
+def load_CIFAR10(ROOT):
+  """ load all of cifar """
+  xs = []
+  ys = []
+  for b in range(1,6):
+    f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
+    X, Y = load_CIFAR_batch(f)
+    xs.append(X)
+    ys.append(Y)    
+  Xtr = np.concatenate(xs)
+  Ytr = np.concatenate(ys)
+  del X, Y
+  Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
+  return Xtr, Ytr, Xte, Yte
+
+
+def get_CIFAR10_data(cifar10_dir, num_training=49000, num_validation=1000, num_test=1000,
+                     subtract_mean=True):
+    """
+    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
+    it for classifiers. These are the same steps as we used for the SVM, but
+    condensed to a single function.
+    """
+    # Load the raw CIFAR-10 data
+    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
+        
+    # Subsample the data
+    mask = list(range(num_training, num_training + num_validation))
+    X_val = X_train[mask]
+    y_val = y_train[mask]
+    mask = list(range(num_training))
+    X_train = X_train[mask]
+    y_train = y_train[mask]
+    mask = list(range(num_test))
+    X_test = X_test[mask]
+    y_test = y_test[mask]
+
+    # Normalize the data: subtract the mean image
+    if subtract_mean:
+      mean_image = np.mean(X_train, axis=0)
+      X_train -= mean_image
+      X_val -= mean_image
+      X_test -= mean_image
+    
+    # Transpose so that channels come first
+    X_train = X_train.transpose(0, 3, 1, 2).copy()
+    X_val = X_val.transpose(0, 3, 1, 2).copy()
+    X_test = X_test.transpose(0, 3, 1, 2).copy()
+
+    # Package data into a dictionary
+    return {
+      'X_train': X_train, 'y_train': y_train,
+      'X_val': X_val, 'y_val': y_val,
+      'X_test': X_test, 'y_test': y_test,
+    }
@@ -0,0 +1,139 @@
+import numpy as np
+
+
+def affine_forward(x, W, b):
+    """
+    A linear mapping from inputs to scores.
+    
+    Inputs:
+        - x: input matrix (N, d_1, ..., d_k)
+        - W: weigh matrix (D, C)
+        - b: bias vector  (C, )
+    
+    Outputs:
+        - out: output of linear layer (N, C)
+    """
+    x2d = np.reshape(x, (x.shape[0], -1))  # convert 4D input matrix to 2D    
+    out = np.dot(x2d, W) + b               # linear transformation
+    cache = (x, W, b)                      # keep for backward step (stay with us)
+    return out, cache
+
+
+def affine_backward(dout, cache):
+    """
+    Computes the backward pass for an affine layer.
+
+    Inputs:
+        - dout: Upstream derivative, of shape (N, C)
+        - cache: Tuple of:
+            - x: Input data, of shape (N, d_1, ... d_k)
+            - w: Weights, of shape (D, C)
+            - b: biases, of shape (C,)
+
+    Outputs:
+        - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
+        - dw: Gradient with respect to w, of shape (D, C)
+        - db: Gradient with respect to b, of shape (C,)
+    """
+    x, w, b = cache
+    x2d = np.reshape(x, (x.shape[0], -1))
+
+    # compute gradients
+    db = np.sum(dout, axis=0)
+    dw = np.dot(x2d.T, dout)
+    dx = np.dot(dout, w.T)
+
+    # reshape dx to match the size of x
+    dx = dx.reshape(x.shape)
+    
+    return dx, dw, db
+
+def relu_forward(x):
+    """Forward pass for a layer of rectified linear units.
+
+    Inputs:
+        - x: a numpy array of any shape
+
+    Outputs:
+        - out: output of relu, same shape as x
+        - cache: x
+    """
+    cache = x
+    out = np.maximum(0, x)
+    return out, cache
+
+def relu_backward(dout, cache):
+    """Backward pass for a layer of rectified linear units.
+
+    Inputs:
+        - dout: upstream derevatives, of any shape
+        - cache: x, same shape as dout
+
+    Outputs:
+        - dx: gradient of loss w.r.t x
+    """
+    x = cache
+    dx = dout * (x > 0)
+    return dx
+
+def svm_loss(scores, y):
+    """
+    Fully-vectorized implementation of SVM loss function.
+
+    Inputs:
+        - scores: scores for all training data (N, C)
+        - y: correct labels for the training data of shape (N,)
+
+    Outputs:
+       - loss: data loss plus L2 regularization loss
+       - grads: graidents of loss w.r.t scores
+    """
+
+    N = scores.shape[0]
+
+    # Compute svm data loss
+    correct_class_scores = scores[range(N), y]
+    margins = np.maximum(0.0, scores - correct_class_scores[:, None] + 1.0)
+    margins[range(N), y] = 0.0
+    loss = np.sum(margins) / N
+
+    # Compute gradient off loss function w.r.t. scores
+    num_pos = np.sum(margins > 0, axis=1)
+    dscores = np.zeros(scores.shape)
+    dscores[margins > 0] = 1
+    dscores[range(N), y] -= num_pos
+    dscores /= N
+
+    return loss, dscores
+
+
+def softmax_loss(scores, y):
+    """
+    Softmax loss function, fully vectorized implementation.
+
+    Inputs have dimension D, there are C classes, and we operate on minibatches
+    of N examples.
+
+    Inputs:
+        - scores: A numpy array of shape (N, C).
+        - y: A numpy array of shape (N,) containing training labels;
+
+    Outputs:
+        - loss as single float
+        - gradient with respect to scores
+    """
+    N = scores.shape[0]  # number of input data
+
+    # compute data loss
+    shifted_logits = scores - np.max(scores, axis=1, keepdims=True)
+    Z = np.sum(np.exp(shifted_logits), axis=1, keepdims=True)
+    log_probs = shifted_logits - np.log(Z)
+    probs = np.exp(log_probs)
+    loss = -np.sum(log_probs[range(N), y]) / N
+
+    # Compute gradient of loss function w.r.t. scores
+    dscores = probs.copy()
+    dscores[range(N), y] -= 1
+    dscores /= N
+    
+    return loss, dscores