Gradient Descent in R
set.seed(1974)
N <- 100
X <- matrix(rnorm(N * 2), ncol = 2)
theta_true <- c(2, -1)
y <- as.vector(X %*% theta_true + rnorm(N, sd = 0.5))
t1_seq <- seq(theta_true[1] - 2, theta_true[1] + 2, length.out = 120)
t2_seq <- seq(theta_true[2] - 2, theta_true[2] + 2, length.out = 120)
L <- matrix(0, nrow = length(t1_seq), ncol = length(t2_seq))
for (i in 1:length(t1_seq)) for (j in 1:length(t2_seq)) L[i, j] <- sum((X %*% c(t1_seq[i],t2_seq[j]) - y)^2)
contour(t1_seq, t2_seq, L,
nlevels = 20,
xlab = expression(theta[1]),
ylab = expression(theta[2]))
epoch <- 10
theta.sgd <- matrix(0,N*epoch+1,2)
delta <- 0.05
for (k in 1:epoch)
{
indi <- sample(1:N,N)
for (i in 1:N)
{
grad <- 2*X[indi[i],]*as.vector(X[indi[i],]%*%theta.sgd[i+(k-1)*N,]-y[indi[i]])
theta.sgd[i+(k-1)*N+1,] <- theta.sgd[i+(k-1)*N,]-delta*grad
}
}
plot(1:(N*epoch+1),theta.sgd[,1],type="l")
plot(1:(N*epoch+1),theta.sgd[,2],type="l")
epoch <- 50
theta.gd <- matrix(0,epoch+1,2)
delta <- 0.001
for (k in 1:epoch)
{
grad <- 2*t(X)%*%(X%*%theta.gd[k,]-y)
theta.gd[k+1,] <- theta.gd[k,]-delta*grad
}
plot(1:(epoch+1),theta.gd[,1],type="l")
plot(1:(epoch+1),theta.gd[,2],type="l")
contour(t1_seq, t2_seq, L,
nlevels = 20,
xlab = expression(theta[1]),
ylab = expression(theta[2]))
lines(theta.sgd[,1],theta.sgd[,2],lwd=2,col="red")
lines(theta.gd[,1],theta.gd[,2],lwd=2,col="black")