Boosting method and Neural Net in Python¶
In this project, 200 training examples $X_i\in[0,1]^2$ is used, and $Y_i = X_{i1}^2+X_{i1}^2+\epsilon \sim (0, 0.01)$. The testing examples are generated with a grid of 625 $X_i$'s equally spaced, to approximate the true distribution.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
Regression: Matching pursuit¶
def matching_pursuit(x_train, y_train, lamb, x_test, y_test):
n, p = x_train.shape
# init
x, res = x_train, y_train
loss, L = sum(res**2), sum((res-np.mean(res))**2)
cuts = []
pnlty, cut = np.float64(0), None
# cut
while loss > L:
loss = L.copy()
penalty = pnlty.copy()
cuts.append(cut)
# fit the residual
y = res.copy()
for j in range(p):
for i in range(n-1):
idx = x[:,j]<=x[i,j]
c = [np.mean(y[idx]), np.mean(y[idx==0])]
f = [c[0]*x+c[1]*(1-x) for x in idx]
new_res = y - f
new_pnlty = penalty + lamb # (penalty + lamb*sum(c))
new_L = sum(new_res**2)+new_pnlty
if new_L < L:
res, L, pnlty = new_res.copy(), new_L.copy(), new_pnlty.copy()
cut = [c, x[i,j], j]
del cuts[0]
train_loss = (loss-penalty)/n
# test
nt = len(y_test)
y_hat = np.zeros((nt))
for i in range(len(cuts)):
c, cut, j = cuts[i]
y_hat += c[0]*(x_test[:,j]<=cut)+c[1]*(x_test[:,j]>cut)
test_loss = (sum((y_hat-y_test)**2))/nt
return y_hat, cuts, train_loss, test_loss
### training data
np.random.seed(7)
n = 200
sigma = 0.01
x_train = np.random.uniform(0,1,[n,2])
y_train = np.sum(x_train**2, axis=1)+np.random.normal(0,1,n)*sigma
## testing data
nt = 25
x1, x2 = np.array(range(1,nt+1))/nt, np.array(range(1,nt+1))/nt
x_test = np.array(np.meshgrid(x1, x2)).reshape((2, nt**2)).transpose()
y_test = np.sum(x_test**2, axis=1)+np.random.normal(0,1,nt**2)*sigma
lambs = np.array(range(1,11))/20
train_ls = []
test_ls = []
for l in lambs:
_, _, train_loss, test_loss = matching_pursuit(x_train, y_train, l, x_test, y_test)
train_ls.append(train_loss)
test_ls.append(test_loss)
### choose model with the smallest testing error
lamb_optimal = lambs[np.argmin(test_ls)]
lamb_optimal
plt.figure()
plt.title('Training and testing loss for matching pursuit')
plt.xlabel('Penalty: lambda')
plt.ylabel('Error')
plt.plot(train_ls, label='training')
plt.plot(test_ls, label='testing')
plt.legend(loc='upper right')
plt.savefig("1-1.png")
y_hat, cuts, train_loss, test_loss = matching_pursuit(x_train, y_train, lamb_optimal, x_test, y_test)
plt.close()
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.scatter(x_train[:,0], x_train[:,1], y_train, label = "train", c='orange')
ax.plot_surface(x1.reshape(1,nt), x2.reshape(nt,1), y_hat.reshape(nt,nt))
plt.savefig("1-2.png")
With regularization on the number of cuts, it can be found that the smaller regularization, the more cuts in the X space. As the noise is very small in our training data, more cuts gives a more precise result. The fitted function below is very close to the truth $f(x)=x_1^2+x_2^2$.
Classification: Adaboost¶
def adaboost(x_train, yc_train, x_test, yc_test, num_iteration=200):
n, p = x_train.shape
nt = x_test.shape[0]
x, y, y_test = x_train, yc_train.reshape(n,1), yc_test.reshape(nt,1)
# init
clfs, err_trains, err_tests = [], [], []
D = np.ones((n,1))/n
train_score, test_score = np.zeros((n,1)), np.zeros((nt,1))
# cut
for it in range(num_iteration):
eps = 1
for j in range(p):
for i in range(n):
h = np.ones((n,1))
idx = x[:,j]<=x[i,j]
c = -1
h[idx] = -1
mistake = (y!=h)
if sum(mistake) > n/2:
c = 1
h = -1*h
mistake = (y!=h)
eps_new = np.sum(D*mistake)
if eps_new < eps:
eps = eps_new.copy()
classifier = (c, x[i,j], j, eps)
h_est = h.copy()
beta = 0.5*np.log((1-eps)/eps)
score = h_est*beta
train_score += score
err_trains.append(np.mean(((train_score>=0)*2-1)!=y))
# update D
D = D*np.exp(-y*score)
D = D/np.sum(D) # n*1
# test
c, cut, j, _ = classifier
h_hat = c*(x_test[:,j]<=cut)-c*(x_test[:,j]>cut)
score = h_hat.reshape((nt,1))*beta
test_score += score
err_tests.append(np.mean(((test_score>=0)*2-1)!=y_test))
clfs.append(classifier)
return test_score, clfs, err_trains, err_tests
yc_train = (y_train-0.8>=0)*2-1
yc_test = (y_test-0.8>=0)*2-1
result, clfs, err_trains, err_tests = adaboost(x_train, yc_train, x_test, yc_test, num_iteration=30)
plt.close()
plt.title('Training and testing error for adaboost')
plt.xlabel('num_classifiers')
plt.ylabel('Error')
plt.plot(err_trains, label='training')
plt.plot(err_tests, label='testing')
plt.legend(loc='upper right')
plt.savefig("2-1.png")
plt.title('Scatter plot with classification boundary')
plt.xlabel('x1')
plt.ylabel('x2')
plt.scatter(x_train[yc_train==1,0], x_train[yc_train==1,1], c='b', marker="+", label = 'positive')
plt.scatter(x_train[yc_train==-1,0], x_train[yc_train==-1,1], c='b', marker=".", label = 'negitive')
plt.legend()
plt.contour(x1, x2, result.reshape((nt, nt)), [1])
plt.savefig("2-3.png")
It can be observed that the error rate of adaboost does not change monotonely, while it decrease as more classifier is included in general. A total of 20 weak classifiers gives the classification boundrary below which is quite satifying.
Neural Network¶
After tuning the parameter, a learning rate of 0.001 and initialization with 0.01 scale is chosen, by which MSE/error-rate below 0.05 can be achieved within 200 training epoches.
# one hidden layer and ReLU non-linearity
def forward(x, w, b, relu = True):
s = np.dot(x, w)+b # x: n*p, w: p*d, b: d*1, s:n*d
h = s.copy() # n*d
if relu:
h[s<0] = 0
cache = (x, w, b, s)
return h, cache
def backward(dout, cache, relu = True):
x, w, b, s = cache
ds = dout.copy() # n*d
if relu:
ds[s<0] = 0
dx = np.dot(ds, w.T) # n*p
dw = np.dot(x.T, ds) # p*d
db = np.sum(ds, axis=0) # d*1
return dx, dw, db
def my_NN(x_train, y_train, x_test,y_test, num_hidden=5, learning_rate=1e-5, num_epoch=300, scale=1e-2, clf=False):
n, p = x_train.shape
y_train = y_train.reshape(n,1)
y_test = y_test.reshape(len(y_test),1)
# init
w = np.random.standard_normal((p,num_hidden))*scale
b = np.random.standard_normal((num_hidden))*scale
beta = np.random.standard_normal((num_hidden,1))*scale # d*1
err_trains, err_tests = [], []
for it in range(num_epoch):
w_cache = w.copy()
h, cache1 = forward(x_train, w, b, relu=True) # n*d
score, cache2 = forward(h, beta, 0, relu=False) # regression n*1
if clf == True:
f = ((np.exp(score)-np.exp(-score))/(np.exp(score)+np.exp(-score))>=0)*2-1 # sigmoid
if min(y_train) == 0:
f = 1/(1+np.exp(-score))>=0.5
err = np.mean(y_train!=f)
else:
f = score
err = np.mean((y_train-f)**2)
err_trains.append(err)
# testing
f_hat, _ = forward(forward(x_test, w, b)[0], beta, 0, relu=False)
if clf == True:
f_hat = ((np.exp(f_hat)-np.exp(-f_hat))/(np.exp(f_hat)+np.exp(-f_hat))>=0)*2-1 # sigmoid
if min(y_test) == 0:
f_hat = 1/(1+np.exp(-f_hat))>=0.5
err = np.mean(y_test!=f_hat)
else:
err = np.mean((y_test-f_hat)**2)
err_tests.append(err)
# backprop
dout = y_train-score
dh, dbeta, _ = backward(dout, cache2, relu=False)
_, dw, db = backward(dh, cache1)
beta = beta + learning_rate*dbeta
w = w + learning_rate*dw
b = b + learning_rate*db
par = (w, b, beta)
return par, f_hat, err_trains[2:], err_tests
# regression
par, f_hat, err_trains, err_tests = my_NN(x_train, y_train, x_test,y_test, num_hidden=20, \
learning_rate=1e-3, num_epoch=300, scale=1e-2, clf=False)
plt.close()
plt.title('Training and testing MSE for NN regrssion')
plt.xlabel('num_epoches')
plt.ylabel('MSE')
plt.plot(err_trains, label='training')
plt.plot(err_tests, label='testing')
plt.legend(loc='upper right')
plt.savefig("3-1.png")
plt.close()
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.scatter(x_train[:,0], x_train[:,1], y_train, label = "train", c='orange')
ax.plot_surface(x1.reshape(1,nt), x2.reshape(nt,1), f_hat.reshape(nt,nt))
plt.savefig("3-2.png")
# classification
par, f_hat, err_trains, err_tests = my_NN(x_train, yc_train, x_test, yc_test, num_hidden=10, \
learning_rate=1e-3, num_epoch=200, scale=1e-2, clf=True)
plt.close()
plt.title('Training and testing error for NN classification')
plt.xlabel('num_epoches')
plt.ylabel('error')
plt.plot(err_trains, label='training')
plt.plot(err_tests, label='testing')
plt.legend(loc='upper right')
plt.savefig("3-3.png")
plt.title('Scatter plot with classification boundary')
plt.xlabel('x1')
plt.ylabel('x2')
plt.scatter(x_train[yc_train==1,0], x_train[yc_train==1,1], c='b', marker="+", label = 'positive training')
plt.scatter(x_train[yc_train==-1,0], x_train[yc_train==-1,1], c='b', marker=".", label = 'negitive training')
plt.legend()
plt.contour(x1, x2, f_hat.reshape(nt,nt), [0])
plt.savefig("3-4.png")