from hulearn.datasets import *

load_fish(return_X_y=False, as_frame=False)

Show source code in hulearn/datasets.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def load_fish(return_X_y: bool = False, as_frame: bool = False):
    """
    Loads in a subset of the Fish market dataset. You can find the full dataset [here](https://www.kaggle.com/aungpyaeap/fish-market).

    Arguments:
        return_X_y: return a tuple of (`X`, `y`) for convenience
        as_frame: return all the data as a pandas dataframe

    Usage:

    ```python
    from hulearn.datasets import load_fish

    df = load_fish(as_frame=True)
    X, y = load_fish(return_X_y=True)
    ```
    """
    filepath = resource_filename("hulearn", os.path.join("data", "fish.zip"))
    df = pd.read_csv(filepath)
    if as_frame:
        return df
    X, y = (
        df[["Species", "Length1", "Length2", "Length3", "Height", "Width"]].values,
        df["Weight"].values,
    )
    if return_X_y:
        return X, y
    return {"data": X, "target": y}

Loads in a subset of the Fish market dataset. You can find the full dataset here.

Parameters

Name Type Description Default
return_X_y bool return a tuple of (X, y) for convenience False
as_frame bool return all the data as a pandas dataframe False

Usage:

from hulearn.datasets import load_fish

df = load_fish(as_frame=True)
X, y = load_fish(return_X_y=True)

load_titanic(return_X_y=False, as_frame=False)

Show source code in hulearn/datasets.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def load_titanic(return_X_y: bool = False, as_frame: bool = False):
    """
    Loads in a subset of the titanic dataset. You can find the full dataset [here](https://www.kaggle.com/c/titanic/data).

    Arguments:
        return_X_y: return a tuple of (`X`, `y`) for convenience
        as_frame: return all the data as a pandas dataframe

    Usage:

    ```python
    from hulearn.datasets import load_titanic

    df = load_titanic(as_frame=True)
    X, y = load_titanic(return_X_y=True)
    ```
    """
    filepath = resource_filename("hulearn", os.path.join("data", "titanic.zip"))
    df = pd.read_csv(filepath)
    if as_frame:
        return df
    X, y = (
        df[["pclass", "name", "sex", "age", "fare", "sibsp", "parch"]].values,
        df["survived"].values,
    )
    if return_X_y:
        return X, y
    return {"data": X, "target": y}

Loads in a subset of the titanic dataset. You can find the full dataset here.

Parameters

Name Type Description Default
return_X_y bool return a tuple of (X, y) for convenience False
as_frame bool return all the data as a pandas dataframe False

Usage:

from hulearn.datasets import load_titanic

df = load_titanic(as_frame=True)
X, y = load_titanic(return_X_y=True)