CLI Utils - Fed-BioMed

to simplify imports from fedbiomed.node.cli_utils

Functions

add_database

add_database(dataset_manager, interactive=True, path=None, name=None, tags=None, description=None, data_type=None, dataset_parameters=None)

Adds a dataset to the node database.

Also queries interactively the user on the command line (and file browser) for dataset parameters if needed.

Parameters:

Name	Type	Description	Default
`dataset_manager`	`DatasetManager`	Object for managing the dataset	required
`interactive`	`bool`	Whether to query interactively for dataset parameters even if they are all passed as arguments. Defaults to `True`.	`True`
`path`	`str`	Path to the dataset.	`None`
`name`	`str`	Keyword for the dataset.	`None`
`tags`	`str`	Comma separated list of tags for the dataset.	`None`
`description`	`str`	Human readable description of the dataset.	`None`
`data_type`	`str`	Keyword for the data type of the dataset.	`None`
`dataset_parameters`	`dict`	Parameters for the dataset manager	`None`

Source code in fedbiomed/node/cli_utils/_database.py

def add_database(
    dataset_manager: DatasetManager,
    interactive: bool = True,
    path: str = None,
    name: str = None,
    tags: str = None,
    description: str = None,
    data_type: str = None,
    dataset_parameters: dict = None,
) -> None:
    """Adds a dataset to the node database.

    Also queries interactively the user on the command line (and file browser)
    for dataset parameters if needed.

    Args:
        dataset_manager: Object for managing the dataset
        interactive: Whether to query interactively for dataset parameters
            even if they are all passed as arguments. Defaults to `True`.
        path: Path to the dataset.
        name: Keyword for the dataset.
        tags: Comma separated list of tags for the dataset.
        description: Human readable description of the dataset.
        data_type: Keyword for the data type of the dataset.
        dataset_parameters: Parameters for the dataset manager
    """
    data_loading_plan = None

    # if all args are provided, just try to load the data
    # if not, ask the user more information
    need_interactive_input = (
        interactive is True
        or path is None
        or name is None
        or tags is None
        or description is None
        or data_type is None
    )

    if need_interactive_input:
        # Interactive mode: collect dataset parameters from user
        print("Welcome to the Fed-BioMed CLI data manager")

        # Determine data type
        if interactive is True:
            data_type = validated_data_type_input()
        else:
            data_type = "default"

        if data_type == "default":
            name = "MNIST"
            description = "MNIST database"
            tags = ["#MNIST", "#dataset"]
            if interactive is True:
                tags = _confirm_predefined_dataset_tags(name, tags)
                path = validated_path_input(data_type)

        elif data_type == "mednist":
            name = "MEDNIST"
            description = "MEDNIST dataset"
            tags = ["#MEDNIST", "#dataset"]
            if interactive is True:
                tags = _confirm_predefined_dataset_tags(name, tags)
                path = validated_path_input(data_type)

        # Handle custom datasets
        else:
            # Collect dataset metadata
            name = input("Name of the database: ")
            tags = input("Tags (separate them by comma and no spaces): ")
            tags = tags.replace(" ", "").split(",")
            description = input("Description: ")

            if data_type == "medical-folder":
                path, dataset_parameters, data_loading_plan = (
                    add_medical_folder_dataset_from_cli(
                        dataset_parameters, data_loading_plan
                    )
                )

            elif data_type == "custom":
                path = Path(input("Path to the dataset: ")).resolve()
                # Existence check
                if not path.exists():
                    raise FedbiomedDatasetError(f"Path not found: {path}")

            else:
                path = validated_path_input(data_type)

        # if a data loading plan was specified, we now ask for the description
        if interactive is True and data_loading_plan is not None:
            keep_asking_for_input = True
            while keep_asking_for_input:
                desc = input(
                    "Please input a short name/description for your data loading plan:"
                )
                if len(desc) < 4:
                    print("Description must be at least 4 characters long.")
                else:
                    keep_asking_for_input = False
            data_loading_plan.desc = desc

    else:
        # Non-interactive mode:
        # all data have been provided at call
        # check few things

        # transform a string with comma(s) as a string list
        tags = str(tags).split(",")
        name = str(name)
        description = str(description)

        # Validate data type
        data_type = str(data_type).lower()
        if data_type not in [
            "csv",
            "default",
            "mednist",
            "images",
            "medical-folder",
            "custom",
        ]:
            data_type = "default"

        # Validate path
        if not os.path.exists(path):
            logger.critical("provided path does not exists: " + path)

    # Ensure path is absolute
    path = os.path.abspath(path)
    logger.info(f"Dataset absolute path: {path}")

    try:
        dataset_manager.add_database(
            name=name,
            tags=tags,
            data_type=data_type,
            description=description,
            path=path,
            dataset_parameters=dataset_parameters,
            data_loading_plan=data_loading_plan,
        )
    except (AssertionError, FedbiomedDatasetManagerError) as e:
        if interactive is True and messagebox is not None:
            messagebox.showwarning(title="Warning", message=str(e))
        else:
            warnings.warn(f"[ERROR]: {e}", stacklevel=1)
        exit(1)
    except FedbiomedDatasetError as err:
        warnings.warn(
            f"[ERROR]: {err} ... Aborting"
            "\nHint: are you sure you have selected the correct index in Demographic file?",
            stacklevel=1,
        )

    # Display success message
    print("\nGreat! Take a look at your data:")
    dataset_manager.list_my_datasets(verbose=True)

approve_training_plan

approve_training_plan(tp_security_manager, id=None, *, sort_by_date=True)

Approves a given training plan that has either Pending or Rejected status

Parameters:

Name	Type	Description	Default
`tp_security_manager`	`TrainingPlanSecurityManager`	Object for managing the training plan approval	required
`id`	`Optional[str]`	unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.	`None`
`sort_by_date`	`bool`	whether to sort by last modification date. Defaults to True.	`True`

Source code in fedbiomed/node/cli_utils/_training_plan_management.py

def approve_training_plan(
    tp_security_manager: TrainingPlanSecurityManager,
    id: Optional[str] = None,
    *,
    sort_by_date: bool = True,
) -> None:
    """Approves a given training plan that has either Pending or Rejected status

    Args:
        tp_security_manager: Object for managing the training plan approval
        id: unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.
        sort_by_date: whether to sort by last modification date. Defaults to True.
    """

    def approve(training_plan_id):
        tp_security_manager.approve_training_plan(training_plan_id)
        logger.info(
            f"Training plan {training_plan_id} has been approved. "
            "Researchers can now train the Training Plan "
            "on this node."
        )

    # If id is already provided
    if id:
        return approve(id)

    if sort_by_date:
        sort_by = "date_modified"
    else:
        sort_by = None

    non_approved_training_plans = tp_security_manager.list_training_plans(
        sort_by=sort_by,
        select_status=[
            TrainingPlanApprovalStatus.PENDING,
            TrainingPlanApprovalStatus.REJECTED,
        ],
        verbose=False,
    )
    if not non_approved_training_plans:
        logger.warning(
            "All training_plans have been approved or no training plan has been registered... aborting"
        )
        return

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t training plan status "
        + m["training_plan_status"]
        + "\tdate_last_action "
        + str(m["date_last_action"])
        for m in non_approved_training_plans
    ]

    msg = "Select the training plan to approve:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(non_approved_training_plans))
            training_plan_id = non_approved_training_plans[opt_idx]["training_plan_id"]
            return approve(training_plan_id)
        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

delete_all_database

delete_all_database(dataset_manager)

Deletes all datasets from the node's database.

Does not modify the dataset's files.

Parameters:

Name	Type	Description	Default
`dataset_manager`	`DatasetManager`	Object for managing the dataset	required

Source code in fedbiomed/node/cli_utils/_database.py

def delete_all_database(dataset_manager: DatasetManager) -> None:
    """Deletes all datasets from the node's database.

    Does not modify the dataset's files.

    Args:
        dataset_manager: Object for managing the dataset
    """
    my_data = dataset_manager.list_my_datasets(verbose=False)

    if not my_data:
        logger.warning("No dataset to delete")
        return

    for ds in my_data:
        d_id = ds["dataset_id"]
        dataset_manager.dataset_table.delete_by_id(d_id)
        logger.info("Dataset removed for dataset_id:" + str(d_id))

delete_database

delete_database(dataset_manager, interactive=True)

Removes one or more dataset from the node's database.

Does not modify the dataset's files.

Parameters:

Name	Type	Description	Default
`interactive`	`bool`	if `True` interactively queries (repeatedly) from the command line for a dataset to delete if `False` delete MNIST dataset if it exists in the database	`True`

Source code in fedbiomed/node/cli_utils/_database.py

def delete_database(dataset_manager: DatasetManager, interactive: bool = True) -> None:
    """Removes one or more dataset from the node's database.

    Does not modify the dataset's files.

    Args:
        interactive:

            - if `True` interactively queries (repeatedly) from the command line
                for a dataset to delete
            - if `False` delete MNIST dataset if it exists in the database
    """
    my_data = dataset_manager.list_my_datasets(verbose=False)
    if not my_data:
        logger.warning("No dataset to delete")
        return

    msg: str = ""
    d_id: Union[str, None] = None

    if interactive is True:
        options = [d["name"] for d in my_data]
        msg = "Select the dataset to delete:\n"
        msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
        msg += "\nSelect: "

    while True:
        try:
            if interactive is True:
                opt_idx = int(input(msg)) - 1
                assert opt_idx in range(len(my_data))

                d_id = my_data[opt_idx]["dataset_id"]
            else:
                for ds in my_data:
                    if ds["name"] == "MNIST":
                        d_id = ds["dataset_id"]
                        break

            if not d_id:
                logger.warning("No matching dataset to delete")
                return
            dataset_manager.dataset_table.delete_by_id(d_id)
            logger.info("Dataset removed. Here your available datasets")
            dataset_manager.list_my_datasets()
            return
        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

delete_training_plan

delete_training_plan(tp_security_manager, id=None)

Deletes an authorized training plan in the database interactively from the CLI.

Does not modify or delete training plan file.

Deletes only registered and requested training_plans. For default training plans, files should be removed directly from the file system.

Parameters:

Name	Type	Description	Default
`tp_security_manager`	`TrainingPlanSecurityManager`	Object for managing the training plan approval	required
`id`	`Optional[str]`	ID of the training plan that will be removed.	`None`

Source code in fedbiomed/node/cli_utils/_training_plan_management.py

def delete_training_plan(
    tp_security_manager: TrainingPlanSecurityManager, id: Optional[str] = None
) -> None:
    """Deletes an authorized training plan in the database interactively from the CLI.

    Does not modify or delete training plan file.

    Deletes only registered and requested training_plans. For default training plans, files
    should be removed directly from the file system.

    Args:
        tp_security_manager: Object for managing the training plan approval
        id: ID of the training plan that will be removed.
    """

    def delete(training_plan_id):
        # Delete training plan
        tp_security_manager.delete_training_plan(training_plan_id)
        logger.info("Training plan has been removed. Here your other training plans")
        tp_security_manager.list_training_plans(verbose=True)

    training_plans = tp_security_manager.list_training_plans(verbose=False)
    training_plans = [
        m
        for m in training_plans
        if m["training_plan_type"]
        in [TrainingPlanStatus.REGISTERED.value, TrainingPlanStatus.REQUESTED.value]
    ]
    if not training_plans:
        logger.warning("No training plans to delete")
        return

    if id:
        return delete(id)

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t Training plan type "
        + m["training_plan_type"]
        + "\tTraining plan status "
        + m["training_plan_status"]
        for m in training_plans
    ]
    msg = "Select the training plan to delete:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_id = training_plans[opt_idx]["training_plan_id"]

            if not training_plan_id:
                logger.warning("No matching training plan to delete")
                return
            return delete(training_plan_id)

        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

register_training_plan

register_training_plan(tp_security_manager)

Registers an authorized training plan in the database interactively through the CLI.

Does not modify training plan file.

Parameters:

Name	Type	Description	Default
`tp_security_manager`	`TrainingPlanSecurityManager`	Object for managing the training plan approval	required

Source code in fedbiomed/node/cli_utils/_training_plan_management.py

def register_training_plan(tp_security_manager: TrainingPlanSecurityManager):
    """Registers an authorized training plan in the database interactively through the CLI.

    Does not modify training plan file.

    Args:
        tp_security_manager: Object for managing the training plan approval
    """

    print("Welcome to the Fed-BioMed CLI data manager")
    name = input("Please enter a training plan name: ")
    description = input("Please enter a description for the training plan: ")

    # Allow files saved as txt
    path = validated_path_input(type="txt")

    # Register training plan
    try:
        tp_security_manager.register_training_plan(
            name=name, description=description, path=path
        )

    except AssertionError as e:
        if messagebox is not None:
            messagebox.showwarning(title="Warning", message=str(e))
        else:
            warnings.warn(f"[ERROR]: {e}", stacklevel=1)
        exit(1)

    print("\nGreat! Take a look at your data:")
    tp_security_manager.list_training_plans(verbose=True)

reject_training_plan

reject_training_plan(tp_security_manager, id=None, notes=None)

Rejects a given training plan that has either Pending or Approved status

Parameters:

Name	Type	Description	Default
`tp_security_manager`	`TrainingPlanSecurityManager`	Object for managing the training plan approval	required
`id`	`Optional[str]`	Training plan ID	`None`
`notes`	`Optional[str]`	Comment about rejection reason	`None`

Source code in fedbiomed/node/cli_utils/_training_plan_management.py

def reject_training_plan(
    tp_security_manager: TrainingPlanSecurityManager,
    id: Optional[str] = None,
    notes: Optional[str] = None,
) -> None:
    """Rejects a given training plan that has either Pending or Approved status

    Args:
        tp_security_manager: Object for managing the training plan approval
        id: Training plan ID
        notes: Comment about rejection reason
    """

    def reject(training_plan_id, notes):
        tp_security_manager.reject_training_plan(training_plan_id, notes)
        logger.info(
            f"Training plan {training_plan_id} has been rejected. "
            "Researchers can not train training plan "
            "on this node anymore"
        )

    approved_training_plans = tp_security_manager.list_training_plans(
        select_status=[
            TrainingPlanApprovalStatus.APPROVED,
            TrainingPlanApprovalStatus.PENDING,
        ],
        verbose=False,
    )

    if not approved_training_plans:
        logger.warning(
            "All training plans have already been rejected or no training plan has been registered... aborting"
        )
        return

    if id:
        reject(id, notes)
        return

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t training plan status "
        + m["training_plan_status"]
        + "\tTraining plan Type "
        + m["training_plan_type"]
        for m in approved_training_plans
    ]

    msg = "Select the training plan to reject (this will prevent Researcher to run training plan on Node):\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(approved_training_plans))
            training_plan_id = approved_training_plans[opt_idx]["training_plan_id"]
            notes = input(
                "Please give a note to explain why training plan has been rejected: \n"
            )
            reject(training_plan_id, notes)
            return

        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

update_training_plan

update_training_plan(tp_security_manager)

Updates an authorized training plan in the database interactively through the CLI.

Does not modify training plan file.

User can either choose different training plan file (different path) to update training plan or same training plan file.

Parameters:

Name	Type	Description	Default
`tp_security_manager`	`TrainingPlanSecurityManager`	Object for managing the training plan approval	required

Source code in fedbiomed/node/cli_utils/_training_plan_management.py

def update_training_plan(tp_security_manager: TrainingPlanSecurityManager):
    """Updates an authorized training plan in the database interactively through the CLI.

    Does not modify training plan file.

    User can either choose different training plan file (different path)
    to update training plan or same training plan file.

    Args:
        tp_security_manager: Object for managing the training plan approval
    """
    training_plans = tp_security_manager.list_training_plans(verbose=False)

    # Select only registered training plan to update
    training_plans = [
        m
        for m in training_plans
        if m["training_plan_type"] == TrainingPlanStatus.REGISTERED.value
    ]
    if not training_plans:
        logger.warning("No registered training plans has been found to update")
        return

    options = [
        m["name"] + "\t Training plan ID " + m["training_plan_id"]
        for m in training_plans
    ]
    msg = "Select the training plan to update:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            # Get the selection
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_id = training_plans[opt_idx]["training_plan_id"]

            if not training_plan_id:
                logger.warning("No matching training plan to update")
                return

            # Get the new file or same file.  User can provide same training plan file
            # with updated content or new training plan file.
            path = validated_path_input(type="txt")

            # Update training plan through training plan manager
            tp_security_manager.update_training_plan_hash(training_plan_id, path)

            logger.info("Training plan has been updated. Here all your training plans")
            tp_security_manager.list_training_plans(verbose=True)

            return

        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

view_training_plan

view_training_plan(tp_security_manager)

Views source code for a training plan in the database

If training plan cannot be displayed to the logger, then abort.

Parameters:

Name	Type	Description	Default
`tp_security_manager`	`TrainingPlanSecurityManager`	Object for managing the training plan approval	required

Source code in fedbiomed/node/cli_utils/_training_plan_management.py

def view_training_plan(tp_security_manager: TrainingPlanSecurityManager) -> None:
    """Views source code for a training plan in the database

    If training plan cannot be displayed to the logger, then abort.

    Args:
        tp_security_manager: Object for managing the training plan approval
    """
    training_plans = tp_security_manager.list_training_plans(verbose=False)
    if not training_plans:
        logger.warning("No training plan has been registered... aborting")
        return

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t training plan status "
        + m["training_plan_status"]
        for m in training_plans
    ]

    msg = "Select the training plan to view:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\n\nDon't try to modify the training plan with this viewer, modifications will be dropped."
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_name = training_plans[opt_idx]["name"]
        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")
            continue

        # TODO: more robust (when refactor whole CLI)
        # - check `training_plan` though it should never be None, as we just checked for it
        # - check after file copy though it should work
        # - etc.
        training_plan = tp_security_manager.get_training_plan_by_name(
            training_plan_name
        )

        try:
            training_plan_source = highlight(
                training_plan["training_plan"], PythonLexer(), Terminal256Formatter()
            )
            logger.info(f"\n\n{training_plan_source}\n\n")
        except Exception as err:
            logger.critical(
                f"Cannot display training plan via logger. Aborting. Error message is: {err}"
            )

        return