Classes that simplify imports from fedbiomed.common.dataset_reader
Classes
CsvReader
CsvReader(path, has_header='auto', delimiter=None)
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | Path | The path of the csv file that contains the dataset. | required |
has_header | str | bool | Boolean to indicate whether the file has a header or not. By default it is set as 'auto', which is the case that the reader tries to detect itself whether the file has a header or not. | 'auto' |
delimiter | Optional[str] | The delimiter used in the csv file. By default it is set as None, which is the case that the reader tries to detect itself whether the file has a delimiter or not. | None |
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def __init__(
self,
path: Path,
has_header: str | bool = "auto",
delimiter: Optional[str] = None,
) -> None:
"""Constructs the csv reader.
Args:
path: The path of the csv file that contains the dataset.
has_header: Boolean to indicate whether the file has a header or not.
By default it is set as 'auto', which is the case that the reader tries to
detect itself whether the file has a header or not.
delimiter: The delimiter used in the csv file.
By default it is set as None, which is the case that the reader tries to
detect itself whether the file has a delimiter or not.
"""
self._path = path
self._delimiter = delimiter
self.header: bool | None = None if has_header == "auto" else has_header
# Pre-parse the CSV file to determine its delimiter and header
# Note: this will read the first line of the file
self._pre_parse()
# Initialize the data and the column names
self.data = self._read()
self.columns = list(self.data.columns)
# Initialize shape and length
# Defer costly operations
self._shape = self.data.shape
self._len = self._shape[0]
Attributes
columns instance-attribute
columns = list(columns)
data instance-attribute
data = _read()
header instance-attribute
header = None if has_header == 'auto' else has_header
Functions
get
get(indexes, columns=None)
Gets the specified rows and columns in the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
indexes | int | Iterable | Row indexes to retrieve. | required |
columns | Optional[Iterable | int | str] | (Optional) list of columns to retrieve. | None |
Returns: Polars DataFrame: The specified dataframe.
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def get(
self,
indexes: int | Iterable,
columns: Optional[Iterable | int | str] = None,
) -> pl.DataFrame:
"""Gets the specified rows and columns in the dataset.
Args:
indexes: Row indexes to retrieve.
columns: (Optional) list of columns to retrieve.
Returns:
Polars DataFrame: The specified dataframe.
"""
return self._get_entry(indexes=indexes, columns=columns)
len
len()
Get number of samples
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def len(self) -> int:
"""Get number of samples"""
return self._len
normalize_columns
normalize_columns(columns)
Validates columns and returns them in type list
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
columns | Iterable | int | str | Columns to extract, can be an iterable of column names, a single column name, or an integer index. | required |
Returns: A list of column names.
Raises:
| Type | Description |
|---|---|
FedbiomedUserInputError | If the input does not match the types expected |
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def normalize_columns(self, columns: Iterable | int | str) -> list[str]:
"""Validates `columns` and returns them in type `list`
Args:
columns: Columns to extract, can be an iterable of column names, a single
column name, or an integer index.
Returns:
A list of column names.
Raises:
FedbiomedUserInputError: If the input does not match the types expected
"""
if isinstance(columns, str) or isinstance(columns, int):
columns = [columns]
# if columns is a list of int, convert it to a list of column names
# (auto-generated by polars as column_0, column_1, etc. if there is no header)
if all(isinstance(item, int) for item in columns):
n_cols = len(self.columns)
_inter = list(filter(lambda x: 0 > x or x > n_cols, columns))
if any(_inter):
raise FedbiomedUserInputError(
f"Column index(es) {_inter} is out of range (0 to {n_cols - 1})"
)
columns = [self.columns[i] for i in columns]
_faulty_col = list(filter(lambda x: x not in self.columns, columns))
if any(_faulty_col):
msg = f"Cannot read columns {_faulty_col}: file does not contain those columns specified"
raise FedbiomedUserInputError(msg)
return columns
shape
shape()
Returns the shape of the csv dataset.
Computed before applying transforms or conversion to other format.
Returns:
| Type | Description |
|---|---|
| Dictionary with the shape and other necessary info for the dataset |
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def shape(self):
"""Returns the shape of the csv dataset.
Computed before applying transforms or conversion to other format.
Returns:
Dictionary with the shape and other necessary info for the dataset
"""
return {"csv": self._shape}
to_numpy
to_numpy()
Returns the data as a Numpy ndarray.
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def to_numpy(self):
"""Returns the data as a Numpy ndarray."""
return self.data.to_numpy()
to_pandas
to_pandas()
Returns the data as a Pandas Dataframe.
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def to_pandas(self) -> pd.DataFrame:
"""Returns the data as a Pandas Dataframe."""
return self.data.to_pandas()
unsafe_to_torch
unsafe_to_torch()
This is an unsafe method that returns the data as a Torch Tensor.
Warning: This method requires that columns have homogeneous data types. Havinng mixed types will raise an error.
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def unsafe_to_torch(self):
"""This is an unsafe method that returns the data as a Torch Tensor.
Warning: This method requires that columns have homogeneous data types. Havinng
mixed types will raise an error.
"""
self.data.to_torch()
validate
validate()
Validate the path of the CSV file.
Raises:
| Type | Description |
|---|---|
FedbiomedError | If the path is invalid |
Source code in fedbiomed/common/dataset_reader/_csv_reader.py
def validate(self) -> None:
"""Validate the path of the CSV file.
Raises:
FedbiomedError: If the path is invalid
"""
if not os.path.isfile(self._path):
raise FedbiomedError(f"error: cannot find csv file {self._path}")
NiftiReader
Attributes
data_type class-attribute instance-attribute
data_type = Nifti1Image
Functions
read classmethod
read(path)
Reads the NIfTI file and returns it as a tensor, optionally transformed.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | Union[str, Path] | Path to the NIfTI file (.nii or .nii.gz) | required |
Returns:
| Type | Description |
|---|---|
Nifti1Image | Union[torch.Tensor, np.ndarray, nib.Nifti1Image]: The image data in the specified format. |
Raises: FileNotFoundError: If the file does not exist. ValueError: If the file is not a valid NIfTI file.
Source code in fedbiomed/common/dataset_reader/_nifti_reader.py
@classmethod
def read(cls, path: Union[str, Path]) -> nib.Nifti1Image:
"""Reads the NIfTI file and returns it as a tensor, optionally transformed.
Args:
path (Union[str, Path]): Path to the NIfTI file (.nii or .nii.gz)
Returns:
Union[torch.Tensor, np.ndarray, nib.Nifti1Image]: The image data in the specified format.
Raises:
FileNotFoundError: If the file does not exist.
ValueError: If the file is not a valid NIfTI file.
"""
# IMPORTANT: Read function does not return header. It may bne useful in the future.
if isinstance(path, str):
path = Path(path)
elif not isinstance(path, Path):
raise TypeError(f"Expected path to be a string or Path, got {type(path)}")
path = Path(path)
cls.validate(path)
img = nib.load(str(path))
return img
validate staticmethod
validate(path)
Validate the file path and extension.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path | Path | Path to the NIfTI file. | required |
Source code in fedbiomed/common/dataset_reader/_nifti_reader.py
@staticmethod
def validate(path: Path) -> None:
"""Validate the file path and extension.
Args:
path (Path): Path to the NIfTI file.
"""
if not path.exists():
raise FileNotFoundError(f"NIfTI file does not exist: {path}")
if not path.is_file():
raise ValueError(f"Provided path is not a file: {path}")
if path.suffix not in {".nii", ".gz"} and not path.name.endswith(".nii.gz"):
raise ValueError(f"File must be .nii or .nii.gz: {path.name}")