ContrastiveFinetuner

embetter.finetune.ContrastiveFinetuner

Run a contrastive network to finetune the embeddings towards a class.

Parameters

Name Type Description Default
n_neg number of negative example pairs to sample per positive item 3
n_epochs number of epochs to use for training 20
learning_rate learning rate of the contrastive network 0.001

fit(self, X, y)

Show source code in finetune/_contrastive.py
106
107
108
    def fit(self, X, y):
        """Fits the finetuner."""
        return self.partial_fit(X, y, classes=np.unique(y))

Fits the finetuner.

generate_batch(self, X_torch, y)

Show source code in finetune/_contrastive.py
110
111
112
113
114
115
116
117
118
119
    def generate_batch(self, X_torch, y):
        """Generate a batch of pytorch pairs used for finetuning"""
        pairs = generate_pairs_batch(y, n_neg=self.n_neg)
        X1 = torch.zeros(len(pairs), X_torch.shape[1])
        X2 = torch.zeros(len(pairs), X_torch.shape[1])
        labels = torch.tensor([ex.label for ex in pairs], dtype=torch.long)
        for i, pair in enumerate(pairs):
            X1[i] = X_torch[pair.i1]
            X2[i] = X_torch[pair.i2]
        return X1, X2, labels

Generate a batch of pytorch pairs used for finetuning

partial_fit(self, X, y, classes=None)

Show source code in finetune/_contrastive.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
    def partial_fit(self, X, y, classes=None):
        """Fits the finetuner using the partial_fit API."""
        if not hasattr(self, "_classes"):
            if classes is None:
                raise ValueError("`classes` must be provided for partial_fit")
            self._classes = classes
        # Create a model if it does not exist yet.
        if not hasattr(self, "_model"):
            self._model = ContrastiveNetwork(shape_in=X.shape[1])
            self._optimizer = torch.optim.Adam(
                self._model.parameters(), lr=self.learning_rate
            )
            self._criterion = nn.CrossEntropyLoss()

        X_torch = torch.from_numpy(X).detach().float()

        for epoch in range(self.n_epochs):  # loop over the dataset multiple times
            X1, X2, out = self.generate_batch(X_torch, y=y)

            # zero the parameter gradients
            self._optimizer.zero_grad()

            # forward + backward + optimize
            outputs = self._model(X1, X2)
            loss = self._criterion(outputs, out)
            loss.backward()
            self._optimizer.step()

        return self

Fits the finetuner using the partial_fit API.

transform(self, X, y=None)

Show source code in finetune/_contrastive.py
151
152
153
154
    def transform(self, X, y=None):
        """Transforms the data according to the sklearn api by using the hidden layer."""
        Xt = torch.from_numpy(X).float().detach()
        return self._model.embed(Xt).detach().numpy()

Transforms the data according to the sklearn api by using the hidden layer.