Classes that simplify imports from fedbiomed.common.analytics
Attributes
AGGREGATORS_MAP module-attribute
AGGREGATORS_MAP = {}
Classes
AnalyticsOrchestrator
Orchestrates the computation of analytics over a dataset.
Functions
compute_stats
compute_stats(dataset, dataset_schema=None, stats=None, stats_args=None)
Computes the requested statistics over the dataset.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset | Dataset | The dataset to compute statistics for. | required |
dataset_schema | Optional[Union[str, List[str], Dict[str, Any]]] | Selection to filter the schema (e.g. subset of columns/keys). | None |
stats | Optional[List[str]] | Default list of statistics to compute (e.g. ['mean', 'std']). | None |
stats_args | Optional[Dict[str, Any]] | Specific arguments for statistics, structured matching the schema. e.g. {'image': {'histogram': {'bin_edges': [...]}}} | None |
Returns:
| Type | Description |
|---|---|
Any | The computed statistics. |
Raises:
| Type | Description |
|---|---|
FedbiomedError | If both 'stats' and 'stats_args' are empty/None, if validation fails, or if the dataset is missing required analytics capabilities. |
Source code in fedbiomed/common/analytics/_orchestrator.py
def compute_stats(
self,
dataset: "Dataset",
dataset_schema: Optional[Union[str, List[str], Dict[str, Any]]] = None,
stats: Optional[List[str]] = None,
stats_args: Optional[Dict[str, Any]] = None,
) -> Any:
"""Computes the requested statistics over the dataset.
Args:
dataset: The dataset to compute statistics for.
dataset_schema: Selection to filter the schema (e.g. subset of columns/keys).
stats: Default list of statistics to compute (e.g. ['mean', 'std']).
stats_args: Specific arguments for statistics, structured matching the schema.
e.g. {'image': {'histogram': {'bin_edges': [...]}}}
Returns:
The computed statistics.
Raises:
FedbiomedError: If both 'stats' and 'stats_args' are empty/None, if validation
fails, or if the dataset is missing required analytics capabilities.
"""
# Analytics currently only supports datasets that return data in numpy format
if dataset.to_format != DataReturnFormat.SKLEARN:
raise FedbiomedError(
f"Dataset format: '{dataset.to_format.value}' is not supported for analytics."
)
# Check Capability
if not hasattr(dataset, "analytics_schema"):
raise FedbiomedError("Dataset does not implement 'analytics_schema'.")
# Get Schema
schema = dataset.analytics_schema()
# Get dataset size (needed by buffer-backed accumulators like quantile)
n_samples = len(dataset)
# Build & Validate Configuration
config = self._build_and_validate_config(
schema, dataset_schema, stats, stats_args, n_samples
)
# Build Accumulator Tree
accumulator = self._create_accumulator(config)
# Iterate and Accumulate
for sample in dataset:
accumulator.update(sample)
# Finalize
return accumulator.finalize()