ColorHistogramEncoder

Encoder that generates an embedding based on the color histogram of the image.

Parameters

Name Type Description Default
n_buckets number of buckets per color 256

Usage:

import pandas as pd
from sklearn.pipeline import make_pipeline

from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, ColorHistogramEncoder

# Let's say we start we start with a csv file with filepaths
data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)

# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
    ColumnGrabber("filepaths"),
    ImageLoader(),
    ColorHistogramEncoder()
)

# This pipeline can now encode each image in the dataframe
pipe.fit_transform(df)

transform(self, X, y=None)

Show source code in vision/_colorhist.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    def transform(self, X, y=None):
        """
        Takes a sequence of `PIL.Image` and returns a numpy array representing
        a color histogram for each.
        """
        output = np.zeros((len(X), self.n_buckets * 3))
        for i, x in enumerate(X):
            arr = np.array(x)
            output[i, :] = np.concatenate(
                [
                    np.histogram(
                        arr[:, :, 0].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                    np.histogram(
                        arr[:, :, 1].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                    np.histogram(
                        arr[:, :, 2].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                ]
            )
        return output

Takes a sequence of PIL.Image and returns a numpy array representing a color histogram for each.