Vision

ImageLoader

Bases: EmbetterBase

Component that can turn filepaths into a list of PIL.Image objects.

Parameters:

Name	Type	Description	Default
`convert`	`str`	Color conversion setting from the Python image library.	`'RGB'`
`out`	`str`	What kind of image output format to expect.	`'pil'`

Usage

You can use the ImageLoader in standalone fashion.

from embetter.vision import ImageLoader

filepath = "tests/data/thiscatdoesnotexist.jpeg"
ImageLoader(convert="RGB").fit_transform([filepath])

But it's more common to see it part of a pipeline.

import pandas as pd
from sklearn.pipeline import make_pipeline

from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, ColorHistogramEncoder

# Let's say we start we start with a csv file with filepaths
data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)

# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
    ColumnGrabber("filepaths"),
    ImageLoader(),
    ColorHistogramEncoder()
)

pipe.fit_transform(df)

Source code in embetter/vision/_loader.py

class ImageLoader(EmbetterBase):
    """
    Component that can turn filepaths into a list of PIL.Image objects.

    Arguments:
        convert: Color [conversion setting](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert) from the Python image library.
        out: What kind of image output format to expect.

    **Usage**

    You can use the `ImageLoader` in standalone fashion.

    ```python
    from embetter.vision import ImageLoader

    filepath = "tests/data/thiscatdoesnotexist.jpeg"
    ImageLoader(convert="RGB").fit_transform([filepath])
    ```

    But it's more common to see it part of a pipeline.

    ```python
    import pandas as pd
    from sklearn.pipeline import make_pipeline

    from embetter.grab import ColumnGrabber
    from embetter.vision import ImageLoader, ColorHistogramEncoder

    # Let's say we start we start with a csv file with filepaths
    data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
    df = pd.DataFrame(data)

    # Let's build a pipeline that grabs the column, turns it
    # into an image and embeds it.
    pipe = make_pipeline(
        ColumnGrabber("filepaths"),
        ImageLoader(),
        ColorHistogramEncoder()
    )

    pipe.fit_transform(df)
    ```

    """

    def __init__(self, convert: str = "RGB", out: str = "pil") -> None:
        self.convert = convert
        self.out = out

    def fit(self, X, y=None):
        """
        Not actual "fitting" happens in this method, but it does check the input arguments
        per sklearn convention.
        """
        if self.out not in ["pil", "numpy"]:
            raise ValueError(
                f"Output format parameter out={self.out} must be either pil/numpy."
            )
        return self

    def transform(self, X, y=None):
        """
        Turn a file path into numpy array containing pixel values.
        """
        if self.out == "pil":
            return [Image.open(x).convert(self.convert) for x in X]
        if self.out == "numpy":
            return np.array([np.array(Image.open(x).convert(self.convert)) for x in X])

ColorHistogramEncoder

Bases: EmbetterBase

Encoder that generates an embedding based on the color histogram of the image.

Parameters:

Name	Type	Description	Default
`n_buckets`		number of buckets per color	`256`

Usage:

import pandas as pd
from sklearn.pipeline import make_pipeline

from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, ColorHistogramEncoder

# Let's say we start we start with a csv file with filepaths
data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)

# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
    ColumnGrabber("filepaths"),
    ImageLoader(),
    ColorHistogramEncoder()
)

# This pipeline can now encode each image in the dataframe
pipe.fit_transform(df)

Source code in embetter/vision/_colorhist.py

class ColorHistogramEncoder(EmbetterBase):
    """
    Encoder that generates an embedding based on the color histogram of the image.

    Arguments:
        n_buckets: number of buckets per color

    **Usage**:

    ```python
    import pandas as pd
    from sklearn.pipeline import make_pipeline

    from embetter.grab import ColumnGrabber
    from embetter.vision import ImageLoader, ColorHistogramEncoder

    # Let's say we start we start with a csv file with filepaths
    data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
    df = pd.DataFrame(data)

    # Let's build a pipeline that grabs the column, turns it
    # into an image and embeds it.
    pipe = make_pipeline(
        ColumnGrabber("filepaths"),
        ImageLoader(),
        ColorHistogramEncoder()
    )

    # This pipeline can now encode each image in the dataframe
    pipe.fit_transform(df)
    ```
    """

    def __init__(self, n_buckets=256):
        self.n_buckets = n_buckets

    def transform(self, X, y=None):
        """
        Takes a sequence of `PIL.Image` and returns a numpy array representing
        a color histogram for each.
        """
        output = np.zeros((len(X), self.n_buckets * 3))
        for i, x in enumerate(X):
            arr = np.array(x)
            output[i, :] = np.concatenate(
                [
                    np.histogram(
                        arr[:, :, 0].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                    np.histogram(
                        arr[:, :, 1].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                    np.histogram(
                        arr[:, :, 2].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                ]
            )
        return output

TimmEncoder

Bases: EmbetterBase

Use a pretrained vision model from TorchVision to generate embeddings. Embeddings are provider via the lovely timm library.

You can find a list of available models here.

Parameters:

Name	Type	Description	Default
`name`		name of the model to use	`'mobilenetv3_large_100'`
`encode_predictions`		output the predictions instead of the pooled embedding layer before	`False`

Usage:

import pandas as pd
from sklearn.pipeline import make_pipeline

from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, TimmEncoder

# Let's say we start we start with a csv file with filepaths
data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)

# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
    ColumnGrabber("filepaths"),
    ImageLoader(),
    TimmEncoder(name="mobilenetv3_large_100")
)

# This pipeline can now encode each image in the dataframe
pipe.fit_transform(df)

Source code in embetter/vision/_torchvis.py

class TimmEncoder(EmbetterBase):
    """
    Use a pretrained vision model from TorchVision to generate embeddings. Embeddings
    are provider via the lovely `timm` library.

    You can find a list of available models [here](https://rwightman.github.io/pytorch-image-models/models/).

    Arguments:
        name: name of the model to use
        encode_predictions: output the predictions instead of the pooled embedding layer before

    **Usage**:

    ```python
    import pandas as pd
    from sklearn.pipeline import make_pipeline

    from embetter.grab import ColumnGrabber
    from embetter.vision import ImageLoader, TimmEncoder

    # Let's say we start we start with a csv file with filepaths
    data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
    df = pd.DataFrame(data)

    # Let's build a pipeline that grabs the column, turns it
    # into an image and embeds it.
    pipe = make_pipeline(
        ColumnGrabber("filepaths"),
        ImageLoader(),
        TimmEncoder(name="mobilenetv3_large_100")
    )

    # This pipeline can now encode each image in the dataframe
    pipe.fit_transform(df)
    ```
    """

    def __init__(self, name="mobilenetv3_large_100", encode_predictions=False):
        self.name = name
        self.encode_predictions = encode_predictions
        self.model = timm.create_model(name, pretrained=True, num_classes=0)
        if self.encode_predictions:
            self.model = timm.create_model(name, pretrained=True)
        self.config = resolve_data_config({}, model=self.model)
        self.transform_img = create_transform(**self.config)

    def transform(self, X, y=None):
        """
        Transforms grabbed images into numeric representations.
        """
        batch = [self.transform_img(x).unsqueeze(0) for x in X]
        return np.array([self.model(x).squeeze(0).detach().numpy() for x in batch])