Dataset

Classes that simplify imports from fedbiomed.common.dataset

Classes

CustomDataset

Bases: Dataset

A class representing a custom dataset.

This class allows users to create and manage their own datasets for use in federated learning scenarios.

Functions

complete_initialization
complete_initialization(controller_kwargs, to_format)

Finalize initialization of object to be able to recover items

Parameters:

Name Type Description Default
path

path to dataset

required
to_format DataReturnFormat

format associated to expected return format

required
Source code in fedbiomed/common/dataset/_custom_dataset.py
def complete_initialization(
    self, controller_kwargs: Dict[str, Any], to_format: DataReturnFormat
) -> None:
    """Finalize initialization of object to be able to recover items

    Args:
        path: path to dataset
        to_format: format associated to expected return format
    """

    self.path = controller_kwargs.get("root", None)
    if self.path is None:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Custom Dataset ERROR: 'root' must be provided in controller_kwargs to specify dataset location."
        )
    self._to_format = to_format

    # Call user defined read function to read the dataset
    try:
        self.read()
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to read "
            f"from dataset using read method. Please see error: {e}"
        ) from e

    try:
        sample = self.get_item(0)
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to retrieve item "
            f"from dataset using get_item method. Please see error: {e}"
        ) from e
    if not isinstance(sample, tuple) or len(sample) != 2:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: get_item method must return a tuple of two elements"
            f" (data, target), but got {type(sample).__name__} with"
            f" length {len(sample) if isinstance(sample, (list, tuple)) else 'N/A'}"
        )

    # Following line is just to check that dataset is well implemented
    # and it return correct data type respecting to to_format
    try:
        sample = self[0]
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to retrieve item "
            f"from dataset using get_item method. Please see error: {e}"
        ) from e
get_item abstractmethod
get_item(index)

Retrieves a sample and its label by index.

Parameters:

Name Type Description Default
index int

The index of the sample to retrieve.

required
Source code in fedbiomed/common/dataset/_custom_dataset.py
@abstractmethod
def get_item(self, index):
    """Retrieves a sample and its label by index.

    Args:
        index (int): The index of the sample to retrieve.
    """
    pass
read abstractmethod
read()

Reads the dataset from the specified path.

This method should be implemented by subclasses to load the dataset from the given path and prepare it for use.

Source code in fedbiomed/common/dataset/_custom_dataset.py
@abstractmethod
def read(self) -> None:
    """Reads the dataset from the specified path.

    This method should be implemented by subclasses to load the dataset
    from the given path and prepare it for use.
    """
    pass

Dataset

Bases: ABC

Attributes

target_transform class-attribute instance-attribute
target_transform = None
to_format property writable
to_format
transform class-attribute instance-attribute
transform = None

Functions

apply_transforms
apply_transforms(sample)

Apply transforms to sample in place

Parameters:

Name Type Description Default
sample Dict[str, Any]

sample returned by self._controller.get_sample

required

Raises:

Type Description
FedbiomedError

if there is a problem applying transform or target_transform

Source code in fedbiomed/common/dataset/_dataset.py
def apply_transforms(self, sample: Dict[str, Any]) -> None:
    """Apply transforms to sample in place

    Args:
        sample: sample returned by `self._controller.get_sample`

    Raises:
        FedbiomedError: if there is a problem applying `transform` or `target_transform`
    """
    try:
        sample["data"] = self._transform(
            self._get_default_types_callable()(
                self._get_format_conversion_callable()(sample["data"])
            )
        )
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to apply `transform` to `data` "
            f"in sample in {self._to_format.value} format."
        ) from e

    try:
        sample["data"] = self._get_default_types_callable()(sample["data"])
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to apply default training plan types to `data` "
            f"in sample in {self._to_format.value} format."
        ) from e

    try:
        sample["target"] = self._target_transform(
            self._get_default_types_callable()(
                self._get_format_conversion_callable()(sample["target"])
            )
        )
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to apply `target_transform` to "
            f"`target` in sample in {self._to_format.value} format."
        ) from e

    try:
        sample["target"] = self._get_default_types_callable()(sample["target"])
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to apply default training plan types to `target` "
            f"in sample in {self._to_format.value} format."
        ) from e

    return sample
complete_initialization abstractmethod
complete_initialization()

Finalize initialization of object to be able to recover items

Source code in fedbiomed/common/dataset/_dataset.py
@abstractmethod
def complete_initialization(self) -> None:
    """Finalize initialization of object to be able to recover items"""
    # Recover sample and validate consistency of transforms
    pass

ImageFolderDataset

ImageFolderDataset(transform=None, target_transform=None)

Bases: _ImageLabelDataset

Source code in fedbiomed/common/dataset/_simple_dataset.py
def __init__(
    self,
    transform: Optional[Callable] = None,
    target_transform: Optional[Callable] = None,
):
    if type(self) is _ImageLabelDataset:
        raise FedbiomedValueError(
            f"{ErrorNumbers.FB632.value}: "
            "`_ImageLabelDataset` cannot be instantiated directly"
        )
    self._transform = self._validate_transform(transform)
    self._target_transform = self._validate_transform(target_transform)

MedNistDataset

MedNistDataset(transform=None, target_transform=None)

Bases: _ImageLabelDataset

Source code in fedbiomed/common/dataset/_simple_dataset.py
def __init__(
    self,
    transform: Optional[Callable] = None,
    target_transform: Optional[Callable] = None,
):
    if type(self) is _ImageLabelDataset:
        raise FedbiomedValueError(
            f"{ErrorNumbers.FB632.value}: "
            "`_ImageLabelDataset` cannot be instantiated directly"
        )
    self._transform = self._validate_transform(transform)
    self._target_transform = self._validate_transform(target_transform)

MedicalFolderDataset

MedicalFolderDataset(data_modalities, target_modalities, transform=None, target_transform=None)

Bases: Dataset

Parameters:

Name Type Description Default
data_modalities Union[str, Iterable[str]]

The data modalities to use.

required
target_modalities Optional[Union[str, Iterable[str]]]

The target modalities to use.

required
transform Transform

The transform to apply to the data. Defaults to None.

None
target_transform Transform

The transform to apply to the target data. Defaults to None.

None

Raises:

Type Description
FedbiomedValueError
Source code in fedbiomed/common/dataset/_medical_folder_dataset.py
def __init__(
    self,
    data_modalities: Union[str, Iterable[str]],
    target_modalities: Optional[Union[str, Iterable[str]]],
    transform: Transform = None,
    target_transform: Transform = None,
):
    """Initializes the MedicalFolderDataset.

    Args:
        data_modalities (Union[str, Iterable[str]]): The data modalities to use.
        target_modalities (Optional[Union[str, Iterable[str]]]): The target modalities to use.
        transform (Transform, optional): The transform to apply to the data. Defaults to None.
        target_transform (Transform, optional): The transform to apply to the target data. Defaults to None.

    Raises:
        FedbiomedValueError:
        - If the input modalities are not valid.
        - If `data_modalities` is empty.
        - If `target_transform` is given but `target_modalities` is None\
    """
    if not data_modalities:
        raise FedbiomedValueError(
            f"{ErrorNumbers.FB632.value}: `data_modalities` cannot be empty"
        )

    self._data_modalities = self._normalize_modalities(data_modalities)
    self._target_modalities = (
        None
        if target_modalities is None
        else self._normalize_modalities(target_modalities)
    )

    self._transform = self._validate_transform(
        transform=transform,
        modalities=self._data_modalities,
    )

    if self._target_modalities is None:
        if target_transform is not None:
            raise FedbiomedValueError(
                f"{ErrorNumbers.FB632.value}: `target_transform` provided but "
                "`target_modalities` is None"
            )
        else:
            self._target_transform = None
    else:
        self._target_transform = self._validate_transform(
            transform=target_transform,
            modalities=self._target_modalities,
        )

Functions

complete_initialization
complete_initialization(controller_kwargs, to_format)

Finalize initialization of object to be able to recover items

Parameters:

Name Type Description Default
controller_kwargs Dict[str, Any]

arguments to create controller

required
to_format DataReturnFormat

format associated to expected return format

required
Source code in fedbiomed/common/dataset/_medical_folder_dataset.py
def complete_initialization(
    self,
    controller_kwargs: Dict[str, Any],
    to_format: DataReturnFormat,
) -> None:
    """Finalize initialization of object to be able to recover items

    Args:
        controller_kwargs: arguments to create controller
        to_format: format associated to expected return format
    """
    self.to_format = to_format
    self._init_controller(controller_kwargs=controller_kwargs)

    # Recover sample and validate consistency of transforms
    sample = self._controller.get_sample(0)
    self._validate_format_and_transformations(
        {modality: sample[modality] for modality in self._data_modalities},
        transform=self._transform,
    )
    if self._target_modalities is not None:
        self._validate_format_and_transformations(
            {modality: sample[modality] for modality in self._target_modalities},
            transform=self._target_transform,
            is_target=True,
        )

MnistDataset

MnistDataset(transform=None, target_transform=None)

Bases: _ImageLabelDataset

Source code in fedbiomed/common/dataset/_simple_dataset.py
def __init__(
    self,
    transform: Optional[Callable] = None,
    target_transform: Optional[Callable] = None,
):
    if type(self) is _ImageLabelDataset:
        raise FedbiomedValueError(
            f"{ErrorNumbers.FB632.value}: "
            "`_ImageLabelDataset` cannot be instantiated directly"
        )
    self._transform = self._validate_transform(transform)
    self._target_transform = self._validate_transform(target_transform)

NativeDataset

NativeDataset(dataset, target=None)

Bases: Dataset

A class representing a native dataset.

This class wraps around datasets from popular ML libraries like PyTorch and scikit-learn, allowing them to be used seamlessly in a customized TrainingPlan for FedBiomed.

Parameters:

Name Type Description Default
dataset

Native dataset object from a ML library (e.g., PyTorch, scikit-learn).

required
target Optional[Any]

Optional target data if not included in the dataset.

None

Raises: FedbiomedError: if dataset does not implement collection interface, or if target length does not match dataset length, or if both dataset and argument provide targets.

Source code in fedbiomed/common/dataset/_native_dataset.py
def __init__(self, dataset, target: Optional[Any] = None):
    """Initialize with basic checks, without loading data to memory.

    Args:
        dataset: Native dataset object from a ML library (e.g., PyTorch, scikit-learn).
        target: Optional target data if not included in the dataset.
    Raises:
        FedbiomedError: if dataset does not implement collection interface,
            or if target length does not match dataset length,
            or if both dataset and argument provide targets.
    """
    # Check collection interface
    if not hasattr(dataset, "__len__") or not hasattr(dataset, "__getitem__"):
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Dataset must implement __len__ and __getitem__."
        )

    self._dataset = dataset

    # Probe one sample to determine supervised/unsupervised shape
    try:
        sample = dataset[0]
    except Exception as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to get a sample item from dataset. Details: {e}"
        ) from e

    self._is_supervised = isinstance(sample, tuple)

    # If both dataset and argument provide targets -> conflict
    if self._is_supervised and target is not None:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Target found both in dataset and in 'target' argument."
        )

    # Raise an error if length of target does not match dataset length
    if hasattr(target, "__len__") and len(target) != len(dataset):
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Length of target ({len(target)}) does not match dataset ({len(dataset)})."
        )

    self._target = target  # may be None
    self._to_format: Optional[DataReturnFormat] = None

Functions

complete_initialization
complete_initialization(controller_kwargs, to_format)

Select data and target, and check if they can be converted to requested format.

Parameters:

Name Type Description Default
controller_kwargs Dict[str, Any]

keyword arguments for controller (not used here).

required
to_format DataReturnFormat

format associated to expected return format.

required

Raises: FedbiomedError: if there is a problem converting dataset items to requested format.

Source code in fedbiomed/common/dataset/_native_dataset.py
def complete_initialization(
    self,
    controller_kwargs: Dict[str, Any],
    to_format: DataReturnFormat,
) -> None:
    """Select data and target, and check if they can be converted to requested format.

    Args:
        controller_kwargs: keyword arguments for controller (not used here).
        to_format: format associated to expected return format.
    Raises:
        FedbiomedError: if there is a problem converting dataset items to requested format.
    """

    self._to_format = to_format
    self._converter = self._get_format_conversion_callable()

    if self._is_supervised:
        data, target = self._dataset[0]
    elif self._target is not None:
        data = self._dataset[0]
        target = self._target[0]
    else:
        data = self._dataset[0]
        target = None

    try:
        self._validate_format_conversion(data)
    except FedbiomedError as e:
        raise FedbiomedError(
            f"{ErrorNumbers.FB632.value}: Failed to convert dataset items to "
            f"requested format {to_format}. Details: {e}"
        ) from e

    if target is not None:
        try:
            self._validate_format_conversion(target)
        except FedbiomedError as e:
            raise FedbiomedError(
                f"{ErrorNumbers.FB632.value}: Failed to convert dataset items to "
                f"requested format {to_format}. Details: {e}"
            ) from e

TabularDataset

TabularDataset(input_columns, target_columns, transform=None, target_transform=None)

Bases: Dataset

Parameters:

Name Type Description Default
input_columns Iterable | int | str

Columns to be used as input features

required
target_columns Iterable | int | str

Columns to be used as target

required
transform Optional[Callable]

Transformation to be applied to input features

None
target_transform Optional[Callable]

Transformation to be applied to target

None

Raises: FedbiomedValueError: if input_columns or target_columns are not valid FedbiomedValueError: if transform or target_transform are not valid callables

Source code in fedbiomed/common/dataset/_tabular_dataset.py
def __init__(
    self,
    input_columns: Iterable | int | str,
    target_columns: Iterable | int | str,
    transform: Optional[Callable] = None,
    target_transform: Optional[Callable] = None,
) -> None:
    """Constructor of the class

    Args:
        input_columns: Columns to be used as input features
        target_columns: Columns to be used as target
        transform: Transformation to be applied to input features
        target_transform: Transformation to be applied to target
    Raises:
        FedbiomedValueError: if `input_columns` or `target_columns` are not valid
        FedbiomedValueError: if `transform` or `target_transform` are not valid callables
    """

    # Transformation checks
    self._transform = self._validate_transform(transform=transform)
    self._target_transform = self._validate_transform(transform=target_transform)

    self._input_columns = input_columns
    self._target_columns = target_columns

Functions

complete_initialization
complete_initialization(controller_kwargs, to_format)

Finalize initialization of object to be able to recover items

Parameters:

Name Type Description Default
controller_kwargs Dict[str, Any]

arguments to create controller

required
to_format DataReturnFormat

format associated to expected return format

required
Source code in fedbiomed/common/dataset/_tabular_dataset.py
def complete_initialization(  # type: ignore
    self,
    controller_kwargs: Dict[str, Any],
    to_format: DataReturnFormat,
) -> None:
    """Finalize initialization of object to be able to recover items

    Args:
        controller_kwargs: arguments to create controller
        to_format: format associated to expected return format
    """
    self.to_format = to_format

    self._init_controller(controller_kwargs=controller_kwargs)

    sample = self._controller.get_sample(0)  # type: ignore

    n_rows, _ = sample.shape
    if n_rows > 1:
        raise FedbiomedError(
            f"{ErrorNumbers.FB633.value}: TabularDataset currently only supports "
            "row-wise samples. Sample obtained from controller has multiple rows."
        )

    self._validate_format_and_transformations(
        self._get_inputs_from_sample(sample), transform=self._transform
    )
    self._validate_format_and_transformations(
        self._get_targets_from_sample(sample), transform=self._transform
    )