model.py

import torch
import torch.nn as nn
import torch.nn.functional as F

from features.custom_features import custom_features
from features.densenet_features import densenet121_features, densenet161_features, \
    densenet169_features, densenet201_features
from features.resnet_features import resnet18_features, resnet34_features, \
    resnet50_features, \
    resnet101_features, resnet152_features
from features.vgg_features import vgg11_features, vgg11_bn_features, vgg13_features, \
    vgg13_bn_features, vgg16_features, vgg16_bn_features, \
    vgg19_features, vgg19_bn_features
from receptive_field import compute_proto_layer_rf_info_v2

base_architecture_to_features = {'resnet18': resnet18_features,
                                 'resnet34': resnet34_features,
                                 'resnet50': resnet50_features,
                                 'resnet101': resnet101_features,
                                 'resnet152': resnet152_features,
                                 'densenet121': densenet121_features,
                                 'densenet161': densenet161_features,
                                 'densenet169': densenet169_features,
                                 'densenet201': densenet201_features,
                                 'vgg11': vgg11_features,
                                 'vgg11_bn': vgg11_bn_features,
                                 'vgg13': vgg13_features,
                                 'vgg13_bn': vgg13_bn_features,
                                 'vgg16': vgg16_features,
                                 'vgg16_bn': vgg16_bn_features,
                                 'vgg19': vgg19_features,
                                 'vgg19_bn': vgg19_bn_features,
                                 'custom': custom_features}

cache_embeddings = {}


class PPNet(nn.Module):

    def __init__(self, features, img_size, prototype_shape,
                 proto_layer_rf_info, num_classes, topk_k, init_weights=True,
                 prototype_activation_function='log',
                 add_on_layers_type='bottleneck', hard_constraint_image_dir=None):

        super(PPNet, self).__init__()
        self.hard_constraint_image_dir = hard_constraint_image_dir
        self.img_size = img_size
        self.prototype_shape = prototype_shape
        self.num_prototypes = prototype_shape[0]
        self.num_classes = num_classes
        self.topk_k = topk_k

        self.epsilon = 1e-4

        # prototype_activation_function could be 'log', 'linear',
        # or a generic function that converts distance to similarity score
        self.prototype_activation_function = prototype_activation_function

        '''
        Here we are initializing the class identities of the prototypes
        Without domain specific knowledge we allocate the same number of
        prototypes for each class
        '''
        assert (self.num_prototypes % self.num_classes == 0)
        # a onehot indication matrix for each prototype's class identity
        self.prototype_class_identity = torch.zeros(self.num_prototypes,
                                                    self.num_classes)

        num_prototypes_per_class = self.num_prototypes // self.num_classes
        for j in range(self.num_prototypes):
            self.prototype_class_identity[j, j // num_prototypes_per_class] = 1

        self.proto_layer_rf_info = proto_layer_rf_info

        # this has to be named features to allow the precise loading
        self.features = features

        features_name = str(self.features).upper()
        if features_name.startswith('VGG') or features_name.startswith(
                'RES') or features_name.startswith('CUSTOM'):
            first_add_on_layer_in_channels = \
                [i for i in features.modules() if isinstance(i, nn.Conv2d)][
                    -1].out_channels
        elif features_name.startswith('DENSE'):
            first_add_on_layer_in_channels = \
                [i for i in features.modules() if isinstance(i, nn.BatchNorm2d)][
                    -1].num_features
        else:
            raise Exception('other base base_architecture NOT implemented')

        if add_on_layers_type == 'bottleneck':
            add_on_layers = []
            current_in_channels = first_add_on_layer_in_channels
            while (current_in_channels > self.prototype_shape[1]) or (
                    len(add_on_layers) == 0):
                current_out_channels = max(self.prototype_shape[1],
                                           (current_in_channels // 2))
                add_on_layers.append(nn.Conv2d(in_channels=current_in_channels,
                                               out_channels=current_out_channels,
                                               kernel_size=1))
                add_on_layers.append(nn.ReLU())
                add_on_layers.append(nn.Conv2d(in_channels=current_out_channels,
                                               out_channels=current_out_channels,
                                               kernel_size=1))
                if current_out_channels > self.prototype_shape[1]:
                    add_on_layers.append(nn.ReLU())
                else:
                    assert (current_out_channels == self.prototype_shape[1])
                    add_on_layers.append(nn.Sigmoid())
                current_in_channels = current_in_channels // 2
            self.add_on_layers = nn.Sequential(*add_on_layers)
        else:
            self.add_on_layers = nn.Sequential(
                nn.Conv2d(in_channels=first_add_on_layer_in_channels,
                          out_channels=self.prototype_shape[1], kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=self.prototype_shape[1],
                          out_channels=self.prototype_shape[1], kernel_size=1),
                nn.Sigmoid()
            )

        self.prototype_vectors = nn.Parameter(torch.rand(self.prototype_shape),
                                              requires_grad=True)

        # do not make this just a tensor,
        # since it will not be moved automatically to gpu
        self.ones = nn.Parameter(torch.ones(self.prototype_shape),
                                 requires_grad=False)

        self.last_layer = nn.Linear(self.num_prototypes, self.num_classes,
                                    bias=False)  # do not use bias

        if init_weights:
            self._initialize_weights()

        # for loss att
        self.attribution_map = torch.zeros(self.num_classes, self.num_prototypes,
                                           requires_grad=False)

        # for loss aggr
        self.irrelevant_prototypes_vector = None
        self.irrelevant_prototypes_class = list()

    def add_irrelevant_prototype(self, prototype_number, class_number):
        with torch.no_grad():
            self._validate_prototype_id(class_number)
            self._validate_prototype_id(prototype_number)
            self.attribution_map[class_number, prototype_number] = 1

    def add_class_with_confounder(self, class_number):
        with torch.no_grad():
            self._validate_class_id(class_number)
            if class_number not in self.irrelevant_prototypes_class:
                self.irrelevant_prototypes_class.append(class_number)

    def add_irrelevant_concept(self, prototype_number: int, class_number: int):
        with torch.no_grad():
            self._validate_prototype_id(prototype_number)
            self._validate_class_id(class_number)
            raise NotImplementedError('check if prototype has been already added')
            proto = self.prototype_vectors[prototype_number, ...].detach().clone()
            if self.irrelevant_prototypes_vector is None:
                self.irrelevant_prototypes_vector = proto[None, ...]
            else:
                self.irrelevant_prototypes_vector = torch.cat(
                    [self.irrelevant_prototypes_vector, proto[None, ...]], dim=0)
            self.irrelevant_prototypes_class.append(class_number)
            self.irrelevant_prototypes_vector.requires_grad = False

            # re-initialize prototypes of the class with confounder
            self._re_initialize_prototype_of_class(class_number)

    def _validate_class_id(self, cl_number):
        if cl_number >= self.num_classes or cl_number < 0:
            raise ValueError('Invalid class id')

    def _validate_prototype_id(self, proto_id):
        if proto_id >= self.prototype_shape[0] or proto_id < 0:
            raise ValueError('Invalid prototype id')

    def _re_initialize_prototype_of_class(self, class_number):
        prototype_banned_class = torch.flatten(
            torch.nonzero(self.prototype_class_identity[:, class_number]))
        for p in prototype_banned_class:
            self.re_initialize_prototype_by_id(p)

    def re_initialize_prototype_by_id(self, proto_idx):
        with torch.no_grad():
            state_dict = self.state_dict()
            state_dict['prototype_vectors'][proto_idx] = torch.rand_like(
                state_dict['prototype_vectors'][proto_idx])
            self.load_state_dict(state_dict)

    def conv_features(self, x):
        """
        the feature input to prototype layer
        """
        fe = self.features(x)
        x = self.add_on_layers(fe)
        return x

    @staticmethod
    def _weighted_l2_convolution(input, filter, weights):
        """
        input of shape N * c * h * w
        filter of shape P * c * h1 * w1
        weight of shape P * c * h1 * w1
        """
        input2 = input ** 2
        input_patch_weighted_norm2 = F.conv2d(input=input2, weight=weights)

        filter2 = filter ** 2
        weighted_filter2 = filter2 * weights
        filter_weighted_norm2 = torch.sum(weighted_filter2, dim=(1, 2, 3))
        filter_weighted_norm2_reshape = filter_weighted_norm2.view(-1, 1, 1)

        weighted_filter = filter * weights
        weighted_inner_product = F.conv2d(input=input, weight=weighted_filter)

        # use broadcast
        intermediate_result = \
            - 2 * weighted_inner_product + filter_weighted_norm2_reshape
        # x2_patch_sum and intermediate_result are of the same shape
        distances = F.relu(input_patch_weighted_norm2 + intermediate_result)

        return distances

    def _l2_convolution(self, x):
        """
        apply self.prototype_vectors as l2-convolution filters on input x
        """
        x2 = x ** 2
        x2_patch_sum = F.conv2d(input=x2, weight=self.ones)

        p2 = self.prototype_vectors ** 2
        p2 = torch.sum(p2, dim=(1, 2, 3))
        # p2 is a vector of shape (num_prototypes,)
        # then we reshape it to (num_prototypes, 1, 1)
        p2_reshape = p2.view(-1, 1, 1)

        xp = F.conv2d(input=x, weight=self.prototype_vectors)
        intermediate_result = - 2 * xp + p2_reshape  # use broadcast
        # x2_patch_sum and intermediate_result are of the same shape
        distances = F.relu(x2_patch_sum + intermediate_result)

        return distances

    def _l2_convolution_on_irrelevant_concepts(self, x, proto_vectors):
        ones = torch.ones(proto_vectors.size())
        x2 = x ** 2
        x2_patch_sum = F.conv2d(input=x2, weight=ones)

        p2 = proto_vectors ** 2
        p2 = torch.sum(p2, dim=(1, 2, 3))
        # p2 is a vector of shape (num_prototypes,)
        # then we reshape it to (num_prototypes, 1, 1)
        p2_reshape = p2.view(-1, 1, 1)

        xp = F.conv2d(input=x, weight=proto_vectors)
        intermediate_result = - 2 * xp + p2_reshape  # use broadcast
        # x2_patch_sum and intermediate_result are of the same shape
        distances = F.relu(x2_patch_sum + intermediate_result)

        return distances

    def prototype_distances(self, x):
        """
        x is the raw input
        """
        conv_features = self.conv_features(x)
        distances = self._l2_convolution(conv_features)
        return distances

    def irrelevant_concept_distances(self, x):
        """
        x is the raw input
        """
        conv_features = self.conv_features(x)
        distances = self._l2_convolution_on_irrelevant_concepts(conv_features,
                                                                self.irrelevant_prototypes_vector)
        return distances

    def distance_2_similarity(self, distances, epsilon: float =None):
        epsilon = self.epsilon if epsilon is None else epsilon
        if self.prototype_activation_function == 'log':
            return torch.log((distances + 1) / (distances + epsilon))
        elif self.prototype_activation_function == 'linear':
            return -distances
        else:
            return self.prototype_activation_function(distances)

    def forward(self, x):
        distances = self.prototype_distances(x)

        if 0 < self.topk_k < 1:
            self.topk_k = int(self.topk_k * (distances.shape[2] * distances.shape[3]))
        else:
            self.topk_k = int(self.topk_k)
        # top-k avg
        _distances = distances.view(distances.shape[0], distances.shape[1], -1)
        top_k_neg_distances, _ = torch.topk(-_distances, self.topk_k)
        closest_k_distances = - top_k_neg_distances
        min_distances = F\
            .avg_pool1d(closest_k_distances, kernel_size=closest_k_distances.shape[2]) \
            .view(-1, self.num_prototypes)

        prototype_activations = self.distance_2_similarity(distances)
        _activations = prototype_activations.view(prototype_activations.shape[0],
                                                  prototype_activations.shape[1], -1)
        top_k_activations, _ = torch.topk(_activations, self.topk_k)
        prototype_activations = F\
            .avg_pool1d(top_k_activations, kernel_size=top_k_activations.shape[2]) \
            .view(-1, self.num_prototypes)

        logits = self.last_layer(prototype_activations)

        return logits, min_distances, distances

    def push_forward(self, x):
        """this method is needed for the pushing operation"""
        conv_output = self.conv_features(x)
        distances = self._l2_convolution(conv_output)
        return conv_output, distances

    def prune_prototypes(self, prototypes_to_prune):
        """
        prototypes_to_prune: a list of indices each in
        [0, current number of prototypes - 1] that indicates the prototypes to
        be removed
        """
        prototypes_to_keep = list(
            set(range(self.num_prototypes)) - set(prototypes_to_prune))

        self.prototype_vectors = nn.Parameter(
            self.prototype_vectors.data[prototypes_to_keep, ...],
            requires_grad=True)

        self.prototype_shape = list(self.prototype_vectors.size())
        self.num_prototypes = self.prototype_shape[0]

        # changing self.last_layer in place
        # changing in_features and out_features make sure the numbers are consistent
        self.last_layer.in_features = self.num_prototypes
        self.last_layer.out_features = self.num_classes
        self.last_layer.weight.data = self.last_layer.weight.data[:, prototypes_to_keep]

        # self.ones is nn.Parameter
        self.ones = nn.Parameter(self.ones.data[prototypes_to_keep, ...],
                                 requires_grad=False)
        # self.prototype_class_identity is torch tensor
        # so it does not need .data access for value update
        self.prototype_class_identity = self.prototype_class_identity[
                                        prototypes_to_keep, :]

    def __repr__(self):
        # PPNet(self, features, img_size, prototype_shape,
        # proto_layer_rf_info, num_classes, init_weights=True):
        rep = (
            'PPNet(\n'
            '\tfeatures: {},\n'
            '\timg_size: {},\n'
            '\tprototype_shape: {},\n'
            '\tproto_layer_rf_info: {},\n'
            '\tnum_classes: {},\n'
            '\tepsilon: {},\n'
            '\tclasses_to_fix: {}\n'
            ')'
        )

        return rep.format(self.features,
                          self.img_size,
                          self.prototype_shape,
                          self.proto_layer_rf_info,
                          self.num_classes,
                          self.epsilon,
                          self.irrelevant_prototypes_class)

    def set_last_layer_incorrect_connection(self, incorrect_strength):
        """
        the incorrect strength will be actual strength if -0.5 then input -0.5
        """
        positive_one_weights_locations = torch.t(self.prototype_class_identity)
        negative_one_weights_locations = 1 - positive_one_weights_locations

        correct_class_connection = 1
        incorrect_class_connection = incorrect_strength
        self.last_layer.weight.data.copy_(
            correct_class_connection * positive_one_weights_locations
            + incorrect_class_connection * negative_one_weights_locations)

    def _initialize_weights(self):
        for m in self.add_on_layers.modules():
            if isinstance(m, nn.Conv2d):
                # every init technique has an underscore _ in the name
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        self.set_last_layer_incorrect_connection(incorrect_strength=-0.5)


def construct_PPNet(base_architecture, pretrained, img_size, prototype_shape,
                    num_classes, topk_k, prototype_activation_function='log',
                    add_on_layers_type='bottleneck',
                    hard_constraint_image_dir=None) -> PPNet:
    features = base_architecture_to_features[base_architecture](pretrained=pretrained)
    layer_filter_sizes, layer_strides, layer_paddings = features.conv_info()
    proto_layer_rf_info = compute_proto_layer_rf_info_v2(img_size=img_size,
                                                         layer_filter_sizes=layer_filter_sizes,
                                                         layer_strides=layer_strides,
                                                         layer_paddings=layer_paddings,
                                                         prototype_kernel_size=
                                                         prototype_shape[2])
    return PPNet(features=features,
                 img_size=img_size,
                 prototype_shape=prototype_shape,
                 proto_layer_rf_info=proto_layer_rf_info,
                 num_classes=num_classes,
                 topk_k=topk_k,
                 init_weights=True,
                 prototype_activation_function=prototype_activation_function,
                 add_on_layers_type=add_on_layers_type,
                 hard_constraint_image_dir=hard_constraint_image_dir)