# Fastbook

YouTube Playlist: https://youtube.com/playlist?list=PLD80i8An1OEHdlrBwa7mKFaHX9tH86b93

Notes from the Fastbook.

# Overview

The simplest equation that can power an AI is:

w*x + b = y
# Chapter 1: Intro

# Import all the useful utilities to train a visual learning model
from fastai.vision.all import *

# Download and extract dataset
path = untar_data(URLs.PETS)/'images'

# Function to process a filename and return the label
def is_cat(x):
  return x[0].isupper()

# A loader that can load data and label them by processing filenames
data_loaders = ImageDataLoaders.from_name_func(
  path,  # path to the dataset
  get_image_files(path),  # The input data.
  valid_pct=0.2,  # Set aside 20% for validation after training.
  seed=42,  # Use the same seed in each epoch to ensure same set of validation data is used.
  label_func=is_cat,  # Function to process labels from filename
  item_tfms=Resize(224),  # Resize the images to the same size for processing them efficiently in GPU.
  # item_tfms=RandomResizedCrop(128, min_scale=0.35),  # Alternative

# Use a CNN implementation with 34 layers
architecture = resnet34

# Use predefined function error_rate for validating and measuring the quality
metric = error_rate

# Initialize the vision learner model
learner = vision_learner(

# Fine-tune the pre-trained model in 1 epoch (iteration)
# Chapter 2: Production

# Chapter 3: Ethics

Computers can be (too) powerful. Be responsible.

# Chapter 4: MNIST basics

Typical deep learning flow:

flowchart LR; init-->predict-->loss-->gradient-->step-->stop step--repeat-->predict

Code for implementing (unoptimized) linear learner for classifying images of 3s and 7s and training it:

def sigmoid(x):
    """Function to ensure the loss is between (0, 1)."""
    return 1 / (1 + torch.exp(-x))

def batch_accuracy(predictions_batch, targets_batch):
    preds = predictions_batch.sigmoid()  # i.e. sigmoid(prediction_batch)
    threes = preds > 0.5
    correct = threes == targets_batch
    return correct.float().mean()

def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()

    # Similar to [(1-predictions)[i] if targets[i]==1 else predictions[i] for i in range(len(targets))]
    # but faster (uses GPU)
    loss = torch.where(targets==1, 1-predictions, predictions).mean()

    return loss

def init_params(size, std=1.0):
    params = torch.randn(size) * std

    # Tell pytorch to track the gradient, i.e. d(loss) / d(weight) for each param
    # which is updated via `params.backward()`, and accessible via params.grad
    params = params.requires_grad_()

    return params

# Similar to Pytorch's nn.Linear()
class LinearModel:
    """A simple linear model."""
    def __init__(self, in_features, out_features):
        self.weights = init_params((in_features, out_features))  # torch.Size([in_features, out_features])
        # w*x will always be 0 if "x" is 0. Hence, we need a bias "b"
        # So, the eq is: y = w*x + b
        self.bias = init_params(out_features)  # torch.Size([out_features])

    def parameters(self):
        return self.weights, self.bias

    def __call__(self, xb):
        return (xb @ self.weights) + self.bias  # Matrix multiplication

class SimpleLearner:
    """A simple learner to train models."""
    def __init__(self, data_loaders, model):
        self.data_loaders = data_loaders
        self.model = model

    def calculate_gradient(self, image, target):
        """Calculate gradients i.e. slope i.e. `d(loss) / d(weight)` of weights and biases.

        If it's is very small, it means we're closer to the optimal value.

        predictions = self.model(image)
        loss = mnist_loss(predictions, target)

        # Updates self.model.parameters[n].grad for each layer, see init_params()
        # It could've been named `.calculate_gradient()` to make life easier.

    def step(self, learning_rate):
        """Step function to update the weights and biases.

        If learning rate is too low, it might require a lot of steps to reach the optimal value.
        If learning rate is too high, it might result in loss getting worse, or bounce around in circles.
        for param in self.model.parameters():
            param.data -= param.grad.data * learning_rate

    def reset_gradient(self):
        """Reset the calculated gradients."""
        for p in self.model.parameters():
            p.grad = None

    def train_epoch(self, learning_rate):
        for batch_of_images, batch_of_targets in self.data_loaders.train:
            self.calculate_gradient(batch_of_images, batch_of_targets)

    def validate_epoch(self):
        accuracy = []
        for batch_of_images, batch_of_targets in self.data_loaders.valid:
            batch_of_predictions = self.model(batch_of_images)
            acc = batch_accuracy(batch_of_predictions, batch_of_targets)

        return round(torch.stack(accuracy).mean().item(), 4)

    def train_model(self, epochs, learning_rate):
        for _ in range(epochs):
            print(self.validate_epoch(), end=' ')

# Load data from MNIST dataset
path = untar_data(URLs.MNIST_SAMPLE)
Path.BASE_PATH = path

# Load images into pytorch tensors

train_threes = (path/'train'/'3').ls().sorted()
train_threes = torch.stack([tensor(Image.open(o)) for o in train_threes]).float() / 255

valid_threes = (path/'valid'/'3').ls().sorted()
valid_threes = torch.stack([tensor(Image.open(o)) for o in valid_threes]).float() / 255

train_sevens = (path/'train'/'7').ls().sorted()
train_sevens = torch.stack([tensor(Image.open(o)) for o in train_sevens]).float() / 255

valid_sevens = (path/'valid'/'7').ls().sorted()
valid_sevens = torch.stack([tensor(Image.open(o)) for o in valid_sevens]).float() / 255

train_images = torch.cat([train_threes, train_sevens]).view(-1, 28*28)
train_targets = tensor([1]*len(train_threes) + [0]*len(train_sevens)).unsqueeze(1)
train_dset = list(zip(train_images, train_targets))

valid_images = torch.cat([valid_threes, valid_sevens]).view(-1, 28*28)
valid_targets = tensor([1]*len(valid_threes) + [0]*len(valid_sevens)).unsqueeze(1)
valid_dset = list(zip(valid_images, valid_targets))

# DataLoader builds on top of Dataset, and adds additional functionalitiies.
# Batch size is a tradeoff between speed vs GPU memory
train_dl = DataLoader(train_dset, batch_size=256)
valid_dl = DataLoader(valid_dset, batch_size=256)

# DataLoaders builds on top of Datasets
data_loaders = DataLoaders(train_dl, valid_dl)

# Using our custom learner
model = LinearModel(28*28, 1)
learner = SimpleLearner(data_loaders, model)
learner.train_model(20, learning_rate=1.0)

## Similar to Pytorch's Learner
# model = nn.Linear(28*28, 1)
# learn = Learner(data_loaders, model, opt_func=SGD, loss_func=mnist_loss, metrics=batch_accuracy)
# learn.fit(20, lr=1.0)
## Plot the recorded learning proces with
# plt.plot(L(learn.recorder.values).itemgot(2));
# print("Final accuracy:", learn.recorder.values[-1][2])
class SimpleNet:
    """A simple multi layer neural network."""
    def __init__(self, in_features, out_features):
        # Layer 1: Linear
        # Has 30 output activations, meaning the first layer can construct 30 different
        # features, each representing some different mix of pixels, it can be anything based on
        # complexity.
        self.layer1 = LinearModel(in_features, 30)

        # Layer 2: Nonlinearity a.k.a Activation Function
        # Similar to `F.relu` i.e. Rectified Linear Unit to replace all negative numbers to zero.
        self.layer2 = lambda xb: xb.max(tensor(0.0))

        # Layer 3: Linear
        # Must have 30 inputs activations so they match.
        self.layer3 = LinearModel(30, out_features)

    def parameters(self):
        w1, b1 = self.layer1.parameters()
        w2, b2 = self.layer3.parameters()
        return w1, b1, w2, b2

    def __call__(self, xb):
        res = self.layer1(xb)
        res = self.layer2(res)
        res = self.layer3(res)
        return res

model = SimpleNet(28*28, 1)

## Similar to Pytorch's
# model = nn.Sequential(
#     nn.Linear(28*28, 30),  # Layer 1
#     nn.ReLU(),             # Layer 2
#     nn.Linear(30, 1),      # Layer 3
# )
# Chapter 5: Pet Breeds

# Chapter 6: Multicat

