CLI Utils

to simplify imports from fedbiomed.node.cli_utils

Attributes

dataset_manager module-attribute

dataset_manager = DatasetManager()

tp_security_manager module-attribute

tp_security_manager = TrainingPlanSecurityManager()

Functions

add_database

add_database(interactive=True, path=None, name=None, tags=None, description=None, data_type=None, dataset_parameters=None)

Adds a dataset to the node database.

Also queries interactively the user on the command line (and file browser) for dataset parameters if needed.

Parameters:

Name Type Description Default
interactive bool

Whether to query interactively for dataset parameters even if they are all passed as arguments. Defaults to True.

True
path str

Path to the dataset.

None
name str

Keyword for the dataset.

None
tags str

Comma separated list of tags for the dataset.

None
description str

Human readable description of the dataset.

None
data_type str

Keyword for the data type of the dataset.

None
Source code in fedbiomed/node/cli_utils/_database.py
def add_database(interactive: bool = True,
                 path: str = None,
                 name: str = None,
                 tags: str = None,
                 description: str = None,
                 data_type: str = None,
                 dataset_parameters: dict = None):
    """Adds a dataset to the node database.

    Also queries interactively the user on the command line (and file browser)
    for dataset parameters if needed.

    Args:
        interactive: Whether to query interactively for dataset parameters
            even if they are all passed as arguments. Defaults to `True`.
        path: Path to the dataset.
        name: Keyword for the dataset.
        tags: Comma separated list of tags for the dataset.
        description: Human readable description of the dataset.
        data_type: Keyword for the data type of the dataset.
    """

    dataset_parameters = dataset_parameters or None
    data_loading_plan = None

    # if all args are provided, just try to load the data
    # if not, ask the user more informations
    if interactive or \
            path is None or \
            name is None or \
            tags is None or \
            description is None or \
            data_type is None :


        print('Welcome to the Fed-BioMed CLI data manager')

        if interactive is True:
            data_type = validated_data_type_input()
        else:
            data_type = 'default'

        if data_type == 'default':
            tags = ['#MNIST', "#dataset"]
            if interactive is True:
                while input(f'MNIST will be added with tags {tags} [y/N]').lower() != 'y':
                    pass
                path = validated_path_input(data_type)
            name = 'MNIST'
            description = 'MNIST database'

        elif data_type == 'mednist':
            tags = ['#MEDNIST', "#dataset"]
            if interactive is True:
                while input(f'MEDNIST will be added with tags {tags} [y/N]').lower() != 'y':
                    pass
                path = validated_path_input(data_type)
            name = 'MEDNIST'
            description = 'MEDNIST dataset'
        else:

            name = input('Name of the database: ')

            tags = input('Tags (separate them by comma and no spaces): ')
            tags = tags.replace(' ', '').split(',')

            description = input('Description: ')

            if data_type == 'medical-folder':
                path, dataset_parameters, data_loading_plan = add_medical_folder_dataset_from_cli(interactive,
                                                                                                  dataset_parameters,
                                                                                                  data_loading_plan)
            elif data_type == 'flamby':
                path = None  # flamby datasets are not identified by their path

                # Select the type of dataset (fed_ixi, fed_heart, etc...)
                available_flamby_datasets = discover_flamby_datasets()
                msg = "Please select the FLamby dataset that you're configuring:\n"
                msg += "\n".join([f"\t{i}) {val}" for i, val in available_flamby_datasets.items()])
                msg += "\nselect: "
                keep_asking_for_input = True
                while keep_asking_for_input:
                    try:
                        flamby_dataset_index = input(msg)
                        flamby_dataset_index = int(flamby_dataset_index)
                        # check that the user inserted a number within the valid range
                        if flamby_dataset_index in available_flamby_datasets.keys():
                            keep_asking_for_input = False
                        else:
                            warnings.warn(f"Please pick a number in the range {list(available_flamby_datasets.keys())}")
                    except ValueError:
                        warnings.warn('Please input a numeric value (integer)')

                # Select the center id
                module = import_module(f".{available_flamby_datasets[flamby_dataset_index]}", package='flamby.datasets')
                n_centers = module.NUM_CLIENTS
                keep_asking_for_input = True
                while keep_asking_for_input:
                    try:
                        center_id = int(input(f"Give a center id between 0 and {str(n_centers-1)}: "))
                        if 0 <= center_id < n_centers:
                            keep_asking_for_input = False
                    except ValueError:
                        warnings.warn(f'Please input a numeric value (integer) between 0 and {str(n_centers-1)}')

                # Build the DataLoadingPlan with the selected dataset type and center id
                data_loading_plan = DataLoadingPlan()
                metadata_dlb = FlambyDatasetMetadataBlock()
                metadata_dlb.metadata = {
                    'flamby_dataset_name': available_flamby_datasets[flamby_dataset_index],
                    'flamby_center_id': center_id
                }
                data_loading_plan[FlambyLoadingBlockTypes.FLAMBY_DATASET_METADATA] = metadata_dlb
            else:
                path = validated_path_input(data_type)

        # if a data loading plan was specified, we now ask for the description
        if interactive and data_loading_plan is not None:
            keep_asking_for_input = True
            while keep_asking_for_input:
                desc = input('Please input a short name/description for your data loading plan:')
                if len(desc) < 4:
                    print('Description must be at least 4 characters long.')
                else:
                    keep_asking_for_input = False
            data_loading_plan.desc = desc

    else:
        # all data have been provided at call
        # check few things

        # transform a string with coma(s) as a string list
        tags = str(tags).split(',')

        name = str(name)
        description = str(description)

        data_type = str(data_type).lower()
        if data_type not in [ 'csv', 'default', 'mednist', 'images', 'medical-folder']:
            data_type = 'default'

        if not os.path.exists(path):
            logger.critical("provided path does not exists: " + path)

    logger.info(f"PATH VALUE {path}")
    # Add database
    try:
        dataset_manager.add_database(name=name,
                                     tags=tags,
                                     data_type=data_type,
                                     description=description,
                                     path=path,
                                     dataset_parameters=dataset_parameters,
                                     data_loading_plan=data_loading_plan)
    except (AssertionError, FedbiomedDatasetManagerError) as e:
        if interactive is True:
            try:
                tkinter.messagebox.showwarning(title='Warning', message=str(e))
            except ModuleNotFoundError:
                warnings.warn(f'[ERROR]: {e}')
        else:
            warnings.warn(f'[ERROR]: {e}')
        exit(1)
    except FedbiomedDatasetError as err:
        warnings.warn(f'[ERROR]: {err} ... Aborting'
                      "\nHint: are you sure you have selected the correct index in Demographic file?")
    print('\nGreat! Take a look at your data:')
    dataset_manager.list_my_data(verbose=True)

approve_training_plan

approve_training_plan(id=None, *, sort_by_date=True)

Approves a given training plan that has either Pending or Rejected status

Parameters:

Name Type Description Default
sort_by_date bool

whether to sort by last modification date. Defaults to True.

True
id Optional[str]

unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.

None
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def approve_training_plan(id: Optional[str] = None, *, sort_by_date: bool = True):
    """Approves a given training plan that has either Pending or Rejected status

    Args:
        sort_by_date: whether to sort by last modification date. Defaults to True.
        id: unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.
    """

    def approve(training_plan_id):
        tp_security_manager.approve_training_plan(training_plan_id)
        logger.info(f"Training plan {training_plan_id} has been approved. "
                    "Researchers can now train the Training Plan "
                    "on this node.")

    # If id is already provided
    if id:
        return approve(id)

    if sort_by_date:
        sort_by = 'date_modified'
    else:
        sort_by = None

    non_approved_training_plans = tp_security_manager.list_training_plans(
        sort_by=sort_by,
        select_status=[TrainingPlanApprovalStatus.PENDING,
                       TrainingPlanApprovalStatus.REJECTED],
        verbose=False)
    if not non_approved_training_plans:
        logger.warning("All training_plans have been approved or no training plan has been registered... aborting")
        return

    options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t training plan status ' +
               m['training_plan_status'] + '\tdate_last_action ' +
               str(m['date_last_action']) for m in non_approved_training_plans]

    msg = "Select the training plan to approve:\n"
    msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(non_approved_training_plans))
            training_plan_id = non_approved_training_plans[opt_idx]['training_plan_id']
            return approve(training_plan_id)
        except (ValueError, IndexError, AssertionError):
            logger.error('Invalid option. Please, try again.')

delete_all_database

delete_all_database()

Deletes all datasets from the node's database.

Does not modify the dataset's files.

Source code in fedbiomed/node/cli_utils/_database.py
def delete_all_database():
    """Deletes all datasets from the node's database.

    Does not modify the dataset's files.
    """
    my_data = dataset_manager.list_my_data(verbose=False)

    if not my_data:
        logger.warning('No dataset to delete')
        return

    for ds in my_data:
        d_id = ds['dataset_id']
        dataset_manager.remove_database(d_id)
        logger.info('Dataset removed for dataset_id:' + str(d_id))

    return

delete_database

delete_database(interactive=True)

Removes one or more dataset from the node's database.

Does not modify the dataset's files.

Parameters:

Name Type Description Default
interactive bool
  • if True interactively queries (repeatedly) from the command line for a dataset to delete
  • if False delete MNIST dataset if it exists in the database
True
Source code in fedbiomed/node/cli_utils/_database.py
def delete_database(interactive: bool = True):
    """Removes one or more dataset from the node's database.

    Does not modify the dataset's files.

    Args:
        interactive:

            - if `True` interactively queries (repeatedly) from the command line
                for a dataset to delete
            - if `False` delete MNIST dataset if it exists in the database
    """
    my_data = dataset_manager.list_my_data(verbose=False)
    if not my_data:
        logger.warning('No dataset to delete')
        return

    if interactive is True:
        options = [d['name'] for d in my_data]
        msg = "Select the dataset to delete:\n"
        msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
        msg += "\nSelect: "

    while True:
        try:
            if interactive is True:
                opt_idx = int(input(msg)) - 1
                assert opt_idx in range(len(my_data))

                d_id = my_data[opt_idx]['dataset_id']
            else:
                for ds in my_data:
                    if ds['name'] == 'MNIST':
                        d_id = ds['dataset_id']
                        break

            if not d_id:
                logger.warning('No matching dataset to delete')
                return
            dataset_manager.remove_database(d_id)
            logger.info('Dataset removed. Here your available datasets')
            dataset_manager.list_my_data()
            return
        except (ValueError, IndexError, AssertionError):
            logger.error('Invalid option. Please, try again.')

delete_training_plan

delete_training_plan(id=None)

Deletes an authorized training plan in the database interactively from the CLI.

Does not modify or delete training plan file.

Deletes only registered and requested training_plans. For default training plans, files should be removed directly from the file system.

Parameters:

Name Type Description Default
name

Name of the training plan that will be removed.

required
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def delete_training_plan(id: Optional[str] = None):
    """Deletes an authorized training plan in the database interactively from the CLI.

    Does not modify or delete training plan file.

    Deletes only registered and requested training_plans. For default training plans, files
    should be removed directly from the file system.

    Args:
        name: Name of the training plan that will be removed.
    """

    def delete(training_plan_id):
        # Delete training plan
        tp_security_manager.delete_training_plan(training_plan_id)
        logger.info('Training plan has been removed. Here your other training plans')
        tp_security_manager.list_training_plans(verbose=True)

    training_plans = tp_security_manager.list_training_plans(verbose=False)
    training_plans = [m for m in training_plans if m['training_plan_type'] in [TrainingPlanStatus.REGISTERED.value,
                                                       TrainingPlanStatus.REQUESTED.value]]
    if not training_plans:
        logger.warning('No training plans to delete')
        return

    if id:
        return delete(id)


    options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t Training plan type ' +
               m['training_plan_type'] + '\tTraining plan status ' + m['training_plan_status'] for m in training_plans]
    msg = "Select the training plan to delete:\n"
    msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_id = training_plans[opt_idx]['training_plan_id']

            if not training_plan_id:
                logger.warning('No matching training plan to delete')
                return
            return delete(training_plan_id)

        except (ValueError, IndexError, AssertionError):
            logger.error('Invalid option. Please, try again.')

register_training_plan

register_training_plan()

Registers an authorized training plan in the database interactively through the CLI.

Does not modify training plan file.

Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def register_training_plan():
    """Registers an authorized training plan in the database interactively through the CLI.

    Does not modify training plan file.
    """

    print('Welcome to the Fed-BioMed CLI data manager')
    name = input('Please enter a training plan name: ')
    description = input('Please enter a description for the training plan: ')

    # Allow files saved as txt
    path = validated_path_input(type="txt")

    # Register training plan
    try:
        tp_security_manager.register_training_plan(name=name,
                                     description=description,
                                     path=path)

    except AssertionError as e:
        try:
            tkinter.messagebox.showwarning(title='Warning', message=str(e))
        except ModuleNotFoundError:
            warnings.warn(f'[ERROR]: {e}')
        exit(1)

    print('\nGreat! Take a look at your data:')
    tp_security_manager.list_training_plans(verbose=True)

reject_training_plan

reject_training_plan(id=None, notes=None)

Rejects a given training plan that has either Pending or Approved status

Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def reject_training_plan(id: Optional[str] = None, notes: Optional[str] = None):
    """Rejects a given training plan that has either Pending or Approved status
    """

    def reject(training_plan_id, notes):
        tp_security_manager.reject_training_plan(training_plan_id, notes)
        logger.info(f"Training plan {training_plan_id} has been rejected. "
                     "Researchers can not train training plan "
                     "on this node anymore")


    approved_training_plans = tp_security_manager.list_training_plans(
        select_status=[TrainingPlanApprovalStatus.APPROVED,
                       TrainingPlanApprovalStatus.PENDING],
        verbose=False)

    if not approved_training_plans:
        logger.warning("All training plans have already been rejected or no training plan has been registered... aborting")
        return

    if id:
        reject(id, notes)
        return

    options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t training plan status ' +
               m['training_plan_status'] + '\tTraining plan Type ' + m['training_plan_type'] for m in approved_training_plans]

    msg = "Select the training plan to reject (this will prevent Researcher to run training plan on Node):\n"
    msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(approved_training_plans))
            training_plan_id = approved_training_plans[opt_idx]['training_plan_id']
            notes = input("Please give a note to explain why training plan has been rejected: \n")
            reject(training_plan_id, notes)
            return

        except (ValueError, IndexError, AssertionError):
            logger.error('Invalid option. Please, try again.')

update_training_plan

update_training_plan()

Updates an authorized training plan in the database interactively through the CLI.

Does not modify training plan file.

User can either choose different training plan file (different path) to update training plan or same training plan file.

Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def update_training_plan():
    """Updates an authorized training plan in the database interactively through the CLI.

    Does not modify training plan file.

    User can either choose different training plan file (different path)
    to update training plan or same training plan file.
    """
    training_plans = tp_security_manager.list_training_plans(verbose=False)

    # Select only registered training plan to update
    training_plans = [m for m in training_plans if m['training_plan_type'] == TrainingPlanStatus.REGISTERED.value]
    if not training_plans:
        logger.warning('No registered training plans has been found to update')
        return

    options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] for m in training_plans]
    msg = "Select the training plan to update:\n"
    msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:

            # Get the selection
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_id = training_plans[opt_idx]['training_plan_id']

            if not training_plan_id:
                logger.warning('No matching training plan to update')
                return

            # Get the new file or same file.  User can provide same training plan file
            # with updated content or new training plan file.
            path = validated_path_input(type="txt")

            # Update training plan through training plan manager
            tp_security_manager.update_training_plan_hash(training_plan_id, path)

            logger.info('Training plan has been updated. Here all your training plans')
            tp_security_manager.list_training_plans(verbose=True)

            return

        except (ValueError, IndexError, AssertionError):
            logger.error('Invalid option. Please, try again.')

view_training_plan

view_training_plan()

Views source code for a training plan in the database

If environ[EDITOR] is set then use this editor to view a copy of the training plan source code, so that any modification are not saved to the training plan,

If environ[EDITOR] is unset or cannot be used to view the training plan, then print the training plan to the logger.

If training plan cannot be displayed to the logger, then abort.

Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def view_training_plan():
    """Views source code for a training plan in the database

    If `environ[EDITOR]` is set then use this editor to view a copy of the training plan source code, so that
    any modification are not saved to the training plan,

    If `environ[EDITOR]` is unset or cannot be used to view the training plan, then print the training plan to the logger.

    If training plan cannot be displayed to the logger, then abort.
    """
    training_plans = tp_security_manager.list_training_plans(verbose=False)
    if not training_plans:
        logger.warning("No training plan has been registered... aborting")
        return

    options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t training plan status ' +
               m['training_plan_status'] for m in training_plans]

    msg = "Select the training plan to view:\n"
    msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
    msg += "\n\nDon't try to modify the training plan with this viewer, modifications will be dropped."
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_name = training_plans[opt_idx]['name']
        except (ValueError, IndexError, AssertionError):
            logger.error('Invalid option. Please, try again.')
            continue

        # TODO: more robust (when refactor whole CLI)
        # - check `training_plan` though it should never be None, as we just checked for it
        # - check after file copy though it should work
        # - etc.
        training_plan = tp_security_manager.get_training_plan_by_name(training_plan_name)

        try:
            training_plan_source = highlight(training_plan["training_plan"], PythonLexer(), Terminal256Formatter())
            logger.info(f'\n\n{training_plan_source}\n\n')
        except Exception as err:
            logger.critical(f'Cannot display training plan via logger. Aborting. Error message is: {err}')

        return