Skip to content

Vision

ImageLoader

Bases: EmbetterBase

Component that can turn filepaths into a list of PIL.Image objects.

Parameters:

Name Type Description Default
convert str

Color conversion setting from the Python image library.

'RGB'
out str

What kind of image output format to expect.

'pil'

Usage

You can use the ImageLoader in standalone fashion.

from embetter.vision import ImageLoader

filepath = "tests/data/thiscatdoesnotexist.jpeg"
ImageLoader(convert="RGB").fit_transform([filepath])

But it's more common to see it part of a pipeline.

import pandas as pd
from sklearn.pipeline import make_pipeline

from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, ColorHistogramEncoder

# Let's say we start we start with a csv file with filepaths
data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)

# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
    ColumnGrabber("filepaths"),
    ImageLoader(),
    ColorHistogramEncoder()
)

pipe.fit_transform(df)
Source code in embetter/vision/_loader.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class ImageLoader(EmbetterBase):
    """
    Component that can turn filepaths into a list of PIL.Image objects.

    Arguments:
        convert: Color [conversion setting](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert) from the Python image library.
        out: What kind of image output format to expect.

    **Usage**

    You can use the `ImageLoader` in standalone fashion.

    ```python
    from embetter.vision import ImageLoader

    filepath = "tests/data/thiscatdoesnotexist.jpeg"
    ImageLoader(convert="RGB").fit_transform([filepath])
    ```

    But it's more common to see it part of a pipeline.

    ```python
    import pandas as pd
    from sklearn.pipeline import make_pipeline

    from embetter.grab import ColumnGrabber
    from embetter.vision import ImageLoader, ColorHistogramEncoder

    # Let's say we start we start with a csv file with filepaths
    data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
    df = pd.DataFrame(data)

    # Let's build a pipeline that grabs the column, turns it
    # into an image and embeds it.
    pipe = make_pipeline(
        ColumnGrabber("filepaths"),
        ImageLoader(),
        ColorHistogramEncoder()
    )

    pipe.fit_transform(df)
    ```

    """

    def __init__(self, convert: str = "RGB", out: str = "pil") -> None:
        self.convert = convert
        self.out = out

    def fit(self, X, y=None):
        """
        Not actual "fitting" happens in this method, but it does check the input arguments
        per sklearn convention.
        """
        if self.out not in ["pil", "numpy"]:
            raise ValueError(
                f"Output format parameter out={self.out} must be either pil/numpy."
            )
        return self

    def transform(self, X, y=None):
        """
        Turn a file path into numpy array containing pixel values.
        """
        if self.out == "pil":
            return [Image.open(x).convert(self.convert) for x in X]
        if self.out == "numpy":
            return np.array([np.array(Image.open(x).convert(self.convert)) for x in X])

ColorHistogramEncoder

Bases: EmbetterBase

Encoder that generates an embedding based on the color histogram of the image.

Parameters:

Name Type Description Default
n_buckets

number of buckets per color

256

Usage:

import pandas as pd
from sklearn.pipeline import make_pipeline

from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, ColorHistogramEncoder

# Let's say we start we start with a csv file with filepaths
data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)

# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
    ColumnGrabber("filepaths"),
    ImageLoader(),
    ColorHistogramEncoder()
)

# This pipeline can now encode each image in the dataframe
pipe.fit_transform(df)
Source code in embetter/vision/_colorhist.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class ColorHistogramEncoder(EmbetterBase):
    """
    Encoder that generates an embedding based on the color histogram of the image.

    Arguments:
        n_buckets: number of buckets per color

    **Usage**:

    ```python
    import pandas as pd
    from sklearn.pipeline import make_pipeline

    from embetter.grab import ColumnGrabber
    from embetter.vision import ImageLoader, ColorHistogramEncoder

    # Let's say we start we start with a csv file with filepaths
    data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
    df = pd.DataFrame(data)

    # Let's build a pipeline that grabs the column, turns it
    # into an image and embeds it.
    pipe = make_pipeline(
        ColumnGrabber("filepaths"),
        ImageLoader(),
        ColorHistogramEncoder()
    )

    # This pipeline can now encode each image in the dataframe
    pipe.fit_transform(df)
    ```
    """

    def __init__(self, n_buckets=256):
        self.n_buckets = n_buckets

    def transform(self, X, y=None):
        """
        Takes a sequence of `PIL.Image` and returns a numpy array representing
        a color histogram for each.
        """
        output = np.zeros((len(X), self.n_buckets * 3))
        for i, x in enumerate(X):
            arr = np.array(x)
            output[i, :] = np.concatenate(
                [
                    np.histogram(
                        arr[:, :, 0].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                    np.histogram(
                        arr[:, :, 1].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                    np.histogram(
                        arr[:, :, 2].flatten(),
                        bins=np.linspace(0, 255, self.n_buckets + 1),
                    )[0],
                ]
            )
        return output

TimmEncoder

Bases: EmbetterBase

Use a pretrained vision model from TorchVision to generate embeddings. Embeddings are provider via the lovely timm library.

You can find a list of available models here.

Parameters:

Name Type Description Default
name

name of the model to use

'mobilenetv3_large_100'
encode_predictions

output the predictions instead of the pooled embedding layer before

False

Usage:

import pandas as pd
from sklearn.pipeline import make_pipeline

from embetter.grab import ColumnGrabber
from embetter.vision import ImageLoader, TimmEncoder

# Let's say we start we start with a csv file with filepaths
data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
df = pd.DataFrame(data)

# Let's build a pipeline that grabs the column, turns it
# into an image and embeds it.
pipe = make_pipeline(
    ColumnGrabber("filepaths"),
    ImageLoader(),
    TimmEncoder(name="mobilenetv3_large_100")
)

# This pipeline can now encode each image in the dataframe
pipe.fit_transform(df)
Source code in embetter/vision/_torchvis.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class TimmEncoder(EmbetterBase):
    """
    Use a pretrained vision model from TorchVision to generate embeddings. Embeddings
    are provider via the lovely `timm` library.

    You can find a list of available models [here](https://rwightman.github.io/pytorch-image-models/models/).

    Arguments:
        name: name of the model to use
        encode_predictions: output the predictions instead of the pooled embedding layer before

    **Usage**:

    ```python
    import pandas as pd
    from sklearn.pipeline import make_pipeline

    from embetter.grab import ColumnGrabber
    from embetter.vision import ImageLoader, TimmEncoder

    # Let's say we start we start with a csv file with filepaths
    data = {"filepaths":  ["tests/data/thiscatdoesnotexist.jpeg"]}
    df = pd.DataFrame(data)

    # Let's build a pipeline that grabs the column, turns it
    # into an image and embeds it.
    pipe = make_pipeline(
        ColumnGrabber("filepaths"),
        ImageLoader(),
        TimmEncoder(name="mobilenetv3_large_100")
    )

    # This pipeline can now encode each image in the dataframe
    pipe.fit_transform(df)
    ```
    """

    def __init__(self, name="mobilenetv3_large_100", encode_predictions=False):
        self.name = name
        self.encode_predictions = encode_predictions
        self.model = timm.create_model(name, pretrained=True, num_classes=0)
        if self.encode_predictions:
            self.model = timm.create_model(name, pretrained=True)
        self.config = resolve_data_config({}, model=self.model)
        self.transform_img = create_transform(**self.config)

    def transform(self, X, y=None):
        """
        Transforms grabbed images into numeric representations.
        """
        batch = [self.transform_img(x).unsqueeze(0) for x in X]
        return np.array([self.model(x).squeeze(0).detach().numpy() for x in batch])