interactive(children=(IntSlider(value=3, description='nr_layers', max=6), IntSlider(value=6, description='nr_n…
Repeat until n passes (epochs) are made through the entire training set
interactive(children=(IntSlider(value=50, description='iteration', min=1), Output()), _dom_classes=('widget-in…
y = sigmoid(np.dot(X, W) + b)
def sigmoid(x):
return 1/(1 + np.exp(-x))
def add(x, y):
return x + y
>>> np.array([[1,2],[3,4]]) + np.array([10,20])
array([[11, 22],
[13, 24]])
interactive(children=(Dropdown(description='function', options=('sigmoid', 'tanh', 'relu', 'leaky_relu'), valu…
interactive(children=(Dropdown(description='function', options=('sigmoid', 'tanh', 'relu', 'leaky_relu'), valu…
interactive(children=(IntSlider(value=2, description='nr_layers', max=4, min=1), Output()), _dom_classes=('wid…
interactive(children=(Dropdown(description='function', options=('sigmoid', 'softmax', 'none'), value='sigmoid'…
interactive(children=(IntSlider(value=50, description='iterations', min=1), Dropdown(description='optimizer1',…
interactive(children=(IntSlider(value=50, description='iterations', min=1), Dropdown(description='optimizer1',…
interactive(children=(IntSlider(value=50, description='iterations', min=1), Dropdown(description='optimizer1',…
Adam: RMSProp + momentum. Adds moving average for gradients as well ($\gamma_2$ = momentum):
Adamax: Idem, but use max() instead of moving average: $u_{i,(s)} = max(\gamma u_{i,(s-1)}, |\mathcal{L}(w_{i,(s)})|)$ $$w_{i,(s+1)} = w_{i,(s)}- \frac{\eta}{u_{i,(s)}} \hat{g}_{i,(s)}$$
interactive(children=(IntSlider(value=50, description='iterations', min=1), Dropdown(description='optimizer1',…
interactive(children=(IntSlider(value=50, description='iterations', min=1), Output()), _dom_classes=('widget-i…
network = models.Sequential()
network.add(layers.Dense(512, activation='relu', kernel_initializer='he_normal', input_shape=(28 * 28,)))
network.add(layers.Dense(512, activation='relu', kernel_initializer='he_normal'))
network.add(layers.Dense(10, activation='softmax'))
network.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 512) 401920 dense_1 (Dense) (None, 512) 262656 dense_2 (Dense) (None, 10) 5130 ================================================================= Total params: 669,706 Trainable params: 669,706 Non-trainable params: 0 _________________________________________________________________
# Shorthand
network.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# Detailed
network.compile(loss=CategoricalCrossentropy(label_smoothing=0.01),
optimizer=RMSprop(learning_rate=0.001, momentum=0.0)
metrics=[Accuracy()])
X = X.astype('float32') / 255
X = X.reshape((60000, 28 * 28))
y = to_categorical(y)
history = network.fit(X_train, y_train, epochs=3, batch_size=32);
Epoch 1/3 1875/1875 [==============================] - 24s 13ms/step - loss: 0.4331 - accuracy: 0.8529 Epoch 2/3 1875/1875 [==============================] - 25s 13ms/step - loss: 0.4242 - accuracy: 0.8568 Epoch 3/3 1875/1875 [==============================] - 26s 14ms/step - loss: 0.4183 - accuracy: 0.8573
We can now call predict
to generate predictions, and evaluate the trained model on the entire test set
network.predict(X_test)
test_loss, test_acc = network.evaluate(X_test, y_test)
[0.0240177 0.0001167 0.4472437 0.0056629 0.057807 0.000094 0.4632739 0.0000267 0.0017463 0.0000112]
313/313 [==============================] - 2s 7ms/step - loss: 0.3845 - accuracy: 0.8636 Test accuracy: 0.8636000156402588
earlystop = callbacks.EarlyStopping(monitor='val_loss', patience=3)
model.fit(x_train, y_train, epochs=25, batch_size=512, callbacks=[earlystop])
network = models.Sequential()
network.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(28 * 28,)))
network.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
network = models.Sequential()
network.add(layers.Dense(256, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dropout(0.5))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.Dropout(0.5))
network.add(layers.Dense(10, activation='softmax'))
network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.BatchNormalization())
network.add(layers.Dropout(0.5))
network.add(layers.Dense(256, activation='relu'))
network.add(layers.BatchNormalization())
network.add(layers.Dropout(0.5))
network.add(layers.Dense(64, activation='relu'))
network.add(layers.BatchNormalization())
network.add(layers.Dropout(0.5))
network.add(layers.Dense(32, activation='relu'))
network.add(layers.BatchNormalization())
network.add(layers.Dropout(0.5))
def make_model(hp):
m.add(Dense(units=hp.Int('units', min_value=32, max_value=512, step=32)))
m.compile(optimizer=Adam(hp.Choice('learning rate', [1e-2, 1e-3, 1e-4])))
return model
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
clf = KerasClassifier(make_model)
grid = GridSearchCV(clf, param_grid=param_grid, cv=3)
from kerastuner.tuners import RandomSearch
tuner = keras.RandomSearch(build_model, max_trials=5)