# https://pytorch.org/tutorials/beginner/pytorch_with_examples.html import math import torch dtype = torch.float device = torch.device("cpu") # device = torch.device("cuda:0") # Uncomment this to run on GPU # Create random input and output data x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype) y = torch.sin(x) # Randomly initialize weights a = torch.randn((), device=device, dtype=dtype) b = torch.randn((), device=device, dtype=dtype) c = torch.randn((), device=device, dtype=dtype) d = torch.randn((), device=device, dtype=dtype) learning_rate = 1e-6 for t in range(2000): # Forward pass: compute predicted y y_pred = a + b * x + c * x ** 2 + d * x ** 3 # Compute and print loss loss = (y_pred - y).pow(2).sum().item() if t % 100 == 99: print(t, loss) # Backprop to compute gradients of a, b, c, d with respect to loss grad_y_pred = 2.0 * (y_pred - y) grad_a = grad_y_pred.sum() grad_b = (grad_y_pred * x).sum() grad_c = (grad_y_pred * x ** 2).sum() grad_d = (grad_y_pred * x ** 3).sum() # Update weights using gradient descent a -= learning_rate * grad_a b -= learning_rate * grad_b c -= learning_rate * grad_c d -= learning_rate * grad_d print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3') import matplotlib.pyplot as plt plt.figure(figsize=(10,6)) plt.plot(x.cpu().data.numpy(), y.cpu().data.numpy(),'-g') plt.plot(x.cpu().data.numpy(), y_pred.cpu().data.numpy(),'-b') plt.xlim(-math.pi, math.pi) plt.ylim(-1.1, 1.1) plt.grid() dtype = torch.float device = torch.device("mps") # Create Tensors to hold input and outputs. # By default, requires_grad=False, which indicates that we do not need to # compute gradients with respect to these Tensors during the backward pass. x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype) y = torch.sin(x) # Create random Tensors for weights. For a third order polynomial, we need # 4 weights: y = a + b x + c x^2 + d x^3 # Setting requires_grad=True indicates that we want to compute gradients with # respect to these Tensors during the backward pass. a = torch.randn((), dtype=dtype, requires_grad=True) b = torch.randn((), dtype=dtype, requires_grad=True) c = torch.randn((), dtype=dtype, requires_grad=True) d = torch.randn((), dtype=dtype, requires_grad=True) learning_rate = 1e-6 for t in range(2000): # Forward pass: compute predicted y using operations on Tensors. y_pred = a + b * x + c * x ** 2 + d * x ** 3 # Compute and print loss using operations on Tensors. # Now loss is a Tensor of shape (1,) # loss.item() gets the scalar value held in the loss. loss = (y_pred - y).pow(2).sum() if t % 100 == 99: print(t, loss.item()) # Use autograd to compute the backward pass. This call will compute the # gradient of loss with respect to all Tensors with requires_grad=True. # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding # the gradient of the loss with respect to a, b, c, d respectively. loss.backward() # Manually update weights using gradient descent. Wrap in torch.no_grad() # because weights have requires_grad=True, but we don't need to track this # in autograd. with torch.no_grad(): a -= learning_rate * a.grad b -= learning_rate * b.grad c -= learning_rate * c.grad d -= learning_rate * d.grad # Manually zero the gradients after updating weights a.grad = None b.grad = None c.grad = None d.grad = None print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3') # Create Tensors to hold input and outputs. x = torch.linspace(-math.pi, math.pi, 2000) y = torch.sin(x) # For this example, the output y is a linear function of (x, x^2, x^3), so # we can consider it as a linear layer neural network. Let's prepare the # tensor (x, x^2, x^3). p = torch.tensor([1, 2, 3]) xx = x.unsqueeze(-1).pow(p) # In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape # (3,), for this case, broadcasting semantics will apply to obtain a tensor # of shape (2000, 3) # Use the nn package to define our model as a sequence of layers. nn.Sequential # is a Module which contains other Modules, and applies them in sequence to # produce its output. The Linear Module computes output from input using a # linear function, and holds internal Tensors for its weight and bias. # The Flatten layer flatens the output of the linear layer to a 1D tensor, # to match the shape of `y`. model = torch.nn.Sequential( torch.nn.Linear(3, 1), torch.nn.Flatten(0, 1) ) # The nn package also contains definitions of popular loss functions; in this # case we will use Mean Squared Error (MSE) as our loss function. loss_fn = torch.nn.MSELoss(reduction='sum') learning_rate = 1e-6 for t in range(2000): # Forward pass: compute predicted y by passing x to the model. Module objects # override the __call__ operator so you can call them like functions. When # doing so you pass a Tensor of input data to the Module and it produces # a Tensor of output data. y_pred = model(xx) # Compute and print loss. We pass Tensors containing the predicted and true # values of y, and the loss function returns a Tensor containing the # loss. loss = loss_fn(y_pred, y) if t % 100 == 99: print(t, loss.item()) # Zero the gradients before running the backward pass. model.zero_grad() # Backward pass: compute gradient of the loss with respect to all the learnable # parameters of the model. Internally, the parameters of each Module are stored # in Tensors with requires_grad=True, so this call will compute gradients for # all learnable parameters in the model. loss.backward() # Update the weights using gradient descent. Each parameter is a Tensor, so # we can access its gradients like we did before. with torch.no_grad(): for param in model.parameters(): param -= learning_rate * param.grad # You can access the first layer of `model` like accessing the first item of a list linear_layer = model[0] # For linear layer, its parameters are stored as `weight` and `bias`. print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3') x = torch.linspace(-math.pi, math.pi, 2000) y = torch.sin(x) # Prepare the input tensor (x, x^2, x^3). p = torch.tensor([1, 2, 3]) xx = x.unsqueeze(-1).pow(p) # Use the nn package to define our model and loss function. model = torch.nn.Sequential( torch.nn.Linear(3, 1), torch.nn.Flatten(0, 1) ) loss_fn = torch.nn.MSELoss(reduction='sum') # Use the optim package to define an Optimizer that will update the weights of # the model for us. Here we will use RMSprop; the optim package contains many other # optimization algorithms. The first argument to the RMSprop constructor tells the # optimizer which Tensors it should update. learning_rate = 1e-3 optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate) for t in range(2000): # Forward pass: compute predicted y by passing x to the model. y_pred = model(xx) # Compute and print loss. loss = loss_fn(y_pred, y) if t % 100 == 99: print(t, loss.item()) # Before the backward pass, use the optimizer object to zero all of the # gradients for the variables it will update (which are the learnable # weights of the model). This is because by default, gradients are # accumulated in buffers( i.e, not overwritten) whenever .backward() # is called. Checkout docs of torch.autograd.backward for more details. optimizer.zero_grad() # Backward pass: compute gradient of the loss with respect to model # parameters loss.backward() # Calling the step function on an Optimizer makes an update to its # parameters optimizer.step() linear_layer = model[0] print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3') # import torch import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() # 1 input image channel, 6 output channels, 5x5 square convolution # kernel self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) # an affine operation: y = Wx + b self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): # Max pooling over a (2, 2) window x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # If the size is a square, you can specify with a single number x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() print(net) params = list(net.parameters()) print(len(params)) print(params[0].size()) # conv1's .weight input = torch.randn(1, 1, 32, 32) out = net(input) print(out) net.zero_grad() out.backward(torch.randn(1, 10)) output = net(input) target = torch.randn(10) # a dummy target, for example target = target.view(1, -1) # make it the same shape as output criterion = nn.MSELoss() loss = criterion(output, target) print(loss) print(loss.grad_fn) # MSELoss print(loss.grad_fn.next_functions[0][0]) # Linear print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU net.zero_grad() # zeroes the gradient buffers of all parameters print('conv1.bias.grad before backward') print(net.conv1.bias.grad) loss.backward() print('conv1.bias.grad after backward') print(net.conv1.bias.grad) import torch.optim as optim # create your optimizer optimizer = optim.SGD(net.parameters(), lr=0.01) # in your training loop: optimizer.zero_grad() # zero the gradient buffers output = net(input) loss = criterion(output, target) loss.backward() optimizer.step() # Does the update # https://towardsdatascience.com/handwritten-digit-mnist-pytorch-977b5338e627 import torch import matplotlib.pyplot as plt from time import time from torchvision import datasets, transforms from torch import nn, optim transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)), ]) trainset = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform) valset = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True) dataiter = iter(trainloader) images, labels = next(dataiter) print(images.shape) print(labels.shape) plt.imshow(images[0].numpy().squeeze(), cmap='gray_r'); figure = plt.figure() num_of_images = 60 for index in range(1, num_of_images + 1): plt.subplot(6, 10, index) plt.axis('off') plt.imshow(images[index].numpy().squeeze(), cmap='gray_r') input_size = 784 hidden_sizes = [128, 64] output_size = 10 model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), nn.ReLU(), nn.Linear(hidden_sizes[0], hidden_sizes[1]), nn.ReLU(), nn.Linear(hidden_sizes[1], output_size), nn.LogSoftmax(dim=1)) print(model) criterion = nn.NLLLoss() images, labels = next(iter(trainloader)) images = images.view(images.shape[0], -1) logps = model(images) #log probabilities loss = criterion(logps, labels) #calculate the NLL loss print('Before backward pass: \n', model[0].weight.grad) loss.backward() print('After backward pass: \n', model[0].weight.grad) optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9) time0 = time() epochs = 15 for e in range(epochs): running_loss = 0 for images, labels in trainloader: # Flatten MNIST images into a 784 long vector images = images.view(images.shape[0], -1) # Training pass optimizer.zero_grad() output = model(images) loss = criterion(output, labels) #This is where the model learns by backpropagating loss.backward() #And optimizes its weights here optimizer.step() running_loss += loss.item() print(f"Training loss: {running_loss/len(trainloader)}") print("\nTraining Time (in minutes) =",(time()-time0)/60) correct_count, all_count = 0, 0 for images,labels in valloader: for i in range(len(labels)): img = images[i].view(1, 784) with torch.no_grad(): logps = model(img) ps = torch.exp(logps) probab = list(ps.numpy()[0]) pred_label = probab.index(max(probab)) true_label = labels.numpy()[i] if(true_label == pred_label): correct_count += 1 all_count += 1 print("Number Of Images Tested =", all_count) print("\nModel Accuracy =", (correct_count/all_count)) torch.save(model, './my_mnist_model.pt')