Classes that simplify imports from fedbiomed.common.dataset
Classes
CustomDataset
Bases: Dataset
A class representing a custom dataset.
This class allows users to create and manage their own datasets for use in federated learning scenarios.
Functions
complete_initialization
complete_initialization(controller_kwargs, to_format)
Finalize initialization of object to be able to recover items
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | path to dataset | required | |
to_format | DataReturnFormat | format associated to expected return format | required |
Source code in fedbiomed/common/dataset/_custom_dataset.py
def complete_initialization(
self, controller_kwargs: Dict[str, Any], to_format: DataReturnFormat
) -> None:
"""Finalize initialization of object to be able to recover items
Args:
path: path to dataset
to_format: format associated to expected return format
"""
self.path = controller_kwargs.get("root", None)
if self.path is None:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Custom Dataset ERROR: 'root' must be provided in controller_kwargs to specify dataset location."
)
self._to_format = to_format
# Call user defined read function to read the dataset
try:
self.read()
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to read "
f"from dataset using read method. Please see error: {e}"
) from e
try:
sample = self.get_item(0)
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to retrieve item "
f"from dataset using get_item method. Please see error: {e}"
) from e
if not isinstance(sample, tuple) or len(sample) != 2:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: get_item method must return a tuple of two elements"
f" (data, target), but got {type(sample).__name__} with"
f" length {len(sample) if isinstance(sample, (list, tuple)) else 'N/A'}"
)
# Following line is just to check that dataset is well implemented
# and it return correct data type respecting to to_format
try:
sample = self[0]
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to retrieve item "
f"from dataset using get_item method. Please see error: {e}"
) from e
get_item abstractmethod
get_item(index)
Retrieves a sample and its label by index.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
index | int | The index of the sample to retrieve. | required |
Source code in fedbiomed/common/dataset/_custom_dataset.py
@abstractmethod
def get_item(self, index):
"""Retrieves a sample and its label by index.
Args:
index (int): The index of the sample to retrieve.
"""
pass
read abstractmethod
read()
Reads the dataset from the specified path.
This method should be implemented by subclasses to load the dataset from the given path and prepare it for use.
Source code in fedbiomed/common/dataset/_custom_dataset.py
@abstractmethod
def read(self) -> None:
"""Reads the dataset from the specified path.
This method should be implemented by subclasses to load the dataset
from the given path and prepare it for use.
"""
pass
Dataset
Bases: ABC
Attributes
target_transform class-attribute instance-attribute
target_transform = None
to_format property writable
to_format
transform class-attribute instance-attribute
transform = None
Functions
apply_transforms
apply_transforms(sample)
Apply transforms to sample in place
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
sample | Dict[str, Any] | sample returned by | required |
Raises:
| Type | Description |
|---|---|
FedbiomedError | if there is a problem applying |
Source code in fedbiomed/common/dataset/_dataset.py
def apply_transforms(self, sample: Dict[str, Any]) -> None:
"""Apply transforms to sample in place
Args:
sample: sample returned by `self._controller.get_sample`
Raises:
FedbiomedError: if there is a problem applying `transform` or `target_transform`
"""
try:
sample["data"] = self._transform(
self._get_default_types_callable()(
self._get_format_conversion_callable()(sample["data"])
)
)
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to apply `transform` to `data` "
f"in sample in {self._to_format.value} format."
) from e
try:
sample["data"] = self._get_default_types_callable()(sample["data"])
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to apply default training plan types to `data` "
f"in sample in {self._to_format.value} format."
) from e
try:
sample["target"] = self._target_transform(
self._get_default_types_callable()(
self._get_format_conversion_callable()(sample["target"])
)
)
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to apply `target_transform` to "
f"`target` in sample in {self._to_format.value} format."
) from e
try:
sample["target"] = self._get_default_types_callable()(sample["target"])
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to apply default training plan types to `target` "
f"in sample in {self._to_format.value} format."
) from e
return sample
complete_initialization abstractmethod
complete_initialization()
Finalize initialization of object to be able to recover items
Source code in fedbiomed/common/dataset/_dataset.py
@abstractmethod
def complete_initialization(self) -> None:
"""Finalize initialization of object to be able to recover items"""
# Recover sample and validate consistency of transforms
pass
ImageFolderDataset
ImageFolderDataset(transform=None, target_transform=None)
Bases: _ImageLabelDataset
Source code in fedbiomed/common/dataset/_simple_dataset.py
def __init__(
self,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
):
if type(self) is _ImageLabelDataset:
raise FedbiomedValueError(
f"{ErrorNumbers.FB632.value}: "
"`_ImageLabelDataset` cannot be instantiated directly"
)
self._transform = self._validate_transform(transform)
self._target_transform = self._validate_transform(target_transform)
MedNistDataset
MedNistDataset(transform=None, target_transform=None)
Bases: _ImageLabelDataset
Source code in fedbiomed/common/dataset/_simple_dataset.py
def __init__(
self,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
):
if type(self) is _ImageLabelDataset:
raise FedbiomedValueError(
f"{ErrorNumbers.FB632.value}: "
"`_ImageLabelDataset` cannot be instantiated directly"
)
self._transform = self._validate_transform(transform)
self._target_transform = self._validate_transform(target_transform)
MedicalFolderDataset
MedicalFolderDataset(data_modalities, target_modalities, transform=None, target_transform=None)
Bases: Dataset
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
data_modalities | Union[str, Iterable[str]] | The data modalities to use. | required |
target_modalities | Optional[Union[str, Iterable[str]]] | The target modalities to use. | required |
transform | Transform | The transform to apply to the data. Defaults to None. | None |
target_transform | Transform | The transform to apply to the target data. Defaults to None. | None |
Raises:
| Type | Description |
|---|---|
FedbiomedValueError | |
Source code in fedbiomed/common/dataset/_medical_folder_dataset.py
def __init__(
self,
data_modalities: Union[str, Iterable[str]],
target_modalities: Optional[Union[str, Iterable[str]]],
transform: Transform = None,
target_transform: Transform = None,
):
"""Initializes the MedicalFolderDataset.
Args:
data_modalities (Union[str, Iterable[str]]): The data modalities to use.
target_modalities (Optional[Union[str, Iterable[str]]]): The target modalities to use.
transform (Transform, optional): The transform to apply to the data. Defaults to None.
target_transform (Transform, optional): The transform to apply to the target data. Defaults to None.
Raises:
FedbiomedValueError:
- If the input modalities are not valid.
- If `data_modalities` is empty.
- If `target_transform` is given but `target_modalities` is None\
"""
if not data_modalities:
raise FedbiomedValueError(
f"{ErrorNumbers.FB632.value}: `data_modalities` cannot be empty"
)
self._data_modalities = self._normalize_modalities(data_modalities)
self._target_modalities = (
None
if target_modalities is None
else self._normalize_modalities(target_modalities)
)
self._transform = self._validate_transform(
transform=transform,
modalities=self._data_modalities,
)
if self._target_modalities is None:
if target_transform is not None:
raise FedbiomedValueError(
f"{ErrorNumbers.FB632.value}: `target_transform` provided but "
"`target_modalities` is None"
)
else:
self._target_transform = None
else:
self._target_transform = self._validate_transform(
transform=target_transform,
modalities=self._target_modalities,
)
Functions
complete_initialization
complete_initialization(controller_kwargs, to_format)
Finalize initialization of object to be able to recover items
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
controller_kwargs | Dict[str, Any] | arguments to create controller | required |
to_format | DataReturnFormat | format associated to expected return format | required |
Source code in fedbiomed/common/dataset/_medical_folder_dataset.py
def complete_initialization(
self,
controller_kwargs: Dict[str, Any],
to_format: DataReturnFormat,
) -> None:
"""Finalize initialization of object to be able to recover items
Args:
controller_kwargs: arguments to create controller
to_format: format associated to expected return format
"""
self.to_format = to_format
self._init_controller(controller_kwargs=controller_kwargs)
# Recover sample and validate consistency of transforms
sample = self._controller.get_sample(0)
self._validate_format_and_transformations(
{modality: sample[modality] for modality in self._data_modalities},
transform=self._transform,
)
if self._target_modalities is not None:
self._validate_format_and_transformations(
{modality: sample[modality] for modality in self._target_modalities},
transform=self._target_transform,
is_target=True,
)
MnistDataset
MnistDataset(transform=None, target_transform=None)
Bases: _ImageLabelDataset
Source code in fedbiomed/common/dataset/_simple_dataset.py
def __init__(
self,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
):
if type(self) is _ImageLabelDataset:
raise FedbiomedValueError(
f"{ErrorNumbers.FB632.value}: "
"`_ImageLabelDataset` cannot be instantiated directly"
)
self._transform = self._validate_transform(transform)
self._target_transform = self._validate_transform(target_transform)
NativeDataset
NativeDataset(dataset, target=None)
Bases: Dataset
A class representing a native dataset.
This class wraps around datasets from popular ML libraries like PyTorch and scikit-learn, allowing them to be used seamlessly in a customized TrainingPlan for FedBiomed.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset | Native dataset object from a ML library (e.g., PyTorch, scikit-learn). | required | |
target | Optional[Any] | Optional target data if not included in the dataset. | None |
Raises: FedbiomedError: if dataset does not implement collection interface, or if target length does not match dataset length, or if both dataset and argument provide targets.
Source code in fedbiomed/common/dataset/_native_dataset.py
def __init__(self, dataset, target: Optional[Any] = None):
"""Initialize with basic checks, without loading data to memory.
Args:
dataset: Native dataset object from a ML library (e.g., PyTorch, scikit-learn).
target: Optional target data if not included in the dataset.
Raises:
FedbiomedError: if dataset does not implement collection interface,
or if target length does not match dataset length,
or if both dataset and argument provide targets.
"""
# Check collection interface
if not hasattr(dataset, "__len__") or not hasattr(dataset, "__getitem__"):
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Dataset must implement __len__ and __getitem__."
)
self._dataset = dataset
# Probe one sample to determine supervised/unsupervised shape
try:
sample = dataset[0]
except Exception as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to get a sample item from dataset. Details: {e}"
) from e
self._is_supervised = isinstance(sample, tuple)
# If both dataset and argument provide targets -> conflict
if self._is_supervised and target is not None:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Target found both in dataset and in 'target' argument."
)
# Raise an error if length of target does not match dataset length
if hasattr(target, "__len__") and len(target) != len(dataset):
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Length of target ({len(target)}) does not match dataset ({len(dataset)})."
)
self._target = target # may be None
self._to_format: Optional[DataReturnFormat] = None
Functions
complete_initialization
complete_initialization(controller_kwargs, to_format)
Select data and target, and check if they can be converted to requested format.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
controller_kwargs | Dict[str, Any] | keyword arguments for controller (not used here). | required |
to_format | DataReturnFormat | format associated to expected return format. | required |
Raises: FedbiomedError: if there is a problem converting dataset items to requested format.
Source code in fedbiomed/common/dataset/_native_dataset.py
def complete_initialization(
self,
controller_kwargs: Dict[str, Any],
to_format: DataReturnFormat,
) -> None:
"""Select data and target, and check if they can be converted to requested format.
Args:
controller_kwargs: keyword arguments for controller (not used here).
to_format: format associated to expected return format.
Raises:
FedbiomedError: if there is a problem converting dataset items to requested format.
"""
self._to_format = to_format
self._converter = self._get_format_conversion_callable()
if self._is_supervised:
data, target = self._dataset[0]
elif self._target is not None:
data = self._dataset[0]
target = self._target[0]
else:
data = self._dataset[0]
target = None
try:
self._validate_format_conversion(data)
except FedbiomedError as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to convert dataset items to "
f"requested format {to_format}. Details: {e}"
) from e
if target is not None:
try:
self._validate_format_conversion(target)
except FedbiomedError as e:
raise FedbiomedError(
f"{ErrorNumbers.FB632.value}: Failed to convert dataset items to "
f"requested format {to_format}. Details: {e}"
) from e
TabularDataset
TabularDataset(input_columns, target_columns, transform=None, target_transform=None)
Bases: Dataset
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
input_columns | Iterable | int | str | Columns to be used as input features | required |
target_columns | Iterable | int | str | Columns to be used as target | required |
transform | Optional[Callable] | Transformation to be applied to input features | None |
target_transform | Optional[Callable] | Transformation to be applied to target | None |
Raises: FedbiomedValueError: if input_columns or target_columns are not valid FedbiomedValueError: if transform or target_transform are not valid callables
Source code in fedbiomed/common/dataset/_tabular_dataset.py
def __init__(
self,
input_columns: Iterable | int | str,
target_columns: Iterable | int | str,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
) -> None:
"""Constructor of the class
Args:
input_columns: Columns to be used as input features
target_columns: Columns to be used as target
transform: Transformation to be applied to input features
target_transform: Transformation to be applied to target
Raises:
FedbiomedValueError: if `input_columns` or `target_columns` are not valid
FedbiomedValueError: if `transform` or `target_transform` are not valid callables
"""
# Transformation checks
self._transform = self._validate_transform(transform=transform)
self._target_transform = self._validate_transform(transform=target_transform)
self._input_columns = input_columns
self._target_columns = target_columns
Functions
complete_initialization
complete_initialization(controller_kwargs, to_format)
Finalize initialization of object to be able to recover items
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
controller_kwargs | Dict[str, Any] | arguments to create controller | required |
to_format | DataReturnFormat | format associated to expected return format | required |
Source code in fedbiomed/common/dataset/_tabular_dataset.py
def complete_initialization( # type: ignore
self,
controller_kwargs: Dict[str, Any],
to_format: DataReturnFormat,
) -> None:
"""Finalize initialization of object to be able to recover items
Args:
controller_kwargs: arguments to create controller
to_format: format associated to expected return format
"""
self.to_format = to_format
self._init_controller(controller_kwargs=controller_kwargs)
sample = self._controller.get_sample(0) # type: ignore
n_rows, _ = sample.shape
if n_rows > 1:
raise FedbiomedError(
f"{ErrorNumbers.FB633.value}: TabularDataset currently only supports "
"row-wise samples. Sample obtained from controller has multiple rows."
)
self._validate_format_and_transformations(
self._get_inputs_from_sample(sample), transform=self._transform
)
self._validate_format_and_transformations(
self._get_targets_from_sample(sample), transform=self._transform
)