CLI Utils

to simplify imports from fedbiomed.node.cli_utils

Functions

add_database

add_database(dataset_manager, interactive=True, path=None, name=None, tags=None, description=None, data_type=None, dataset_parameters=None)

Adds a dataset to the node database.

Also queries interactively the user on the command line (and file browser) for dataset parameters if needed.

Parameters:

Name Type Description Default
dataset_manager DatasetManager

Object for managing the dataset

required
interactive bool

Whether to query interactively for dataset parameters even if they are all passed as arguments. Defaults to True.

True
path Optional[str]

Path to the dataset.

None
name Optional[str]

Keyword for the dataset.

None
tags Optional[Union[str, List[str]]]

Comma separated list of tags for the dataset.

None
description Optional[str]

Human readable description of the dataset.

None
data_type Optional[str]

Keyword for the data type of the dataset.

None
dataset_parameters Optional[dict]

Parameters for the dataset manager

None
Source code in fedbiomed/node/cli_utils/_database.py
def add_database(
    dataset_manager: DatasetManager,
    interactive: bool = True,
    path: Optional[str] = None,
    name: Optional[str] = None,
    tags: Optional[Union[str, List[str]]] = None,
    description: Optional[str] = None,
    data_type: Optional[str] = None,
    dataset_parameters: Optional[dict] = None,
) -> None:
    """Adds a dataset to the node database.

    Also queries interactively the user on the command line (and file browser)
    for dataset parameters if needed.

    Args:
        dataset_manager: Object for managing the dataset
        interactive: Whether to query interactively for dataset parameters
            even if they are all passed as arguments. Defaults to `True`.
        path: Path to the dataset.
        name: Keyword for the dataset.
        tags: Comma separated list of tags for the dataset.
        description: Human readable description of the dataset.
        data_type: Keyword for the data type of the dataset.
        dataset_parameters: Parameters for the dataset manager
    """
    data_loading_plan = None

    need_interactive_input = (
        interactive
        or path is None
        or name is None
        or tags is None
        or description is None
        or data_type is None
    )

    if need_interactive_input:
        print("Welcome to the Fed-BioMed CLI data manager")

        data_type = validated_data_type_input() if interactive else "default"

        _predefined = {
            "default": ("MNIST", "MNIST database", ["#MNIST", "#dataset"]),
            "mednist": ("MEDNIST", "MEDNIST dataset", ["#MEDNIST", "#dataset"]),
        }

        if data_type in _predefined:
            name, description, tags = _predefined[data_type]
            if interactive:
                tags = _confirm_predefined_dataset_tags(name, tags)
                path = validated_path_input(data_type)

        else:
            while True:
                name = input("Name of the database: ")
                if len(name) >= 3:
                    break
                print("Name must be at least 3 characters long.")
            while True:
                tags = (
                    input("Tags (separate them by comma and no spaces): ")
                    .replace(" ", "")
                    .split(",")
                )
                if len(tags) >= 1 and all(len(tag) >= 3 for tag in tags):
                    break
                print(
                    "Please enter at least one tag, and ensure all tags are at least 3 characters long."
                )
            while True:
                description = input("Description: ")
                if len(description) >= 3:
                    break
                print("Description must be at least 3 characters long.")

            if data_type == "medical-folder":
                path, dataset_parameters, data_loading_plan = (
                    add_medical_folder_dataset_from_cli(
                        dataset_parameters, data_loading_plan
                    )
                )
            elif data_type == "custom":
                while True:
                    abs_path = Path(input("Path to the dataset: ")).resolve()
                    if abs_path.exists():
                        break
                    print(f"Path not found: {abs_path}. Please try again.")
                path = str(abs_path)
            else:
                path = validated_path_input(data_type)

        if interactive and data_loading_plan is not None:
            while True:
                desc = input(
                    "Please input a short name/description for your data loading plan:"
                )
                if len(desc) >= 4:
                    break
                print("Description must be at least 4 characters long.")
            data_loading_plan.desc = desc

    else:
        tags = str(tags).split(",")
        name = str(name)
        description = str(description)

        if (
            tags == ""
            or not all(len(tag) >= 3 for tag in tags)
            or len(name) < 3
            or len(description) < 3
        ):
            raise FedbiomedDatasetManagerError(
                "Invalid dataset parameters. Please ensure that tags are not empty, "
                "and tags, name and description are at least 3 characters long."
            )

        data_type = str(data_type).lower()
        if data_type not in (
            "csv",
            "default",
            "mednist",
            "images",
            "medical-folder",
            "custom",
        ):
            data_type = "default"

        if path is not None and not os.path.exists(path):
            logger.warning("provided path does not exist: " + path)
            path = _prompt_path_cli(data_type)

    if path is None:
        raise FedbiomedDatasetManagerError("Dataset path is not set")
    path = os.path.abspath(path)
    logger.info(f"Dataset absolute path: {path}")

    try:
        dataset_manager.add_database(
            name=name,
            tags=tags,
            data_type=data_type,
            description=description,
            path=path,
            dataset_parameters=dataset_parameters,
            data_loading_plan=data_loading_plan,
        )
    except (AssertionError, FedbiomedDatasetManagerError) as e:
        if interactive and messagebox is not None:
            messagebox.showwarning(title="Warning", message=str(e))
        else:
            logger.error(str(e))
        exit(1)
    except FedbiomedDatasetError as err:
        logger.error(
            f"{err} ... Aborting"
            "\nHint: are you sure you have selected the correct index in Demographic file?"
        )
        exit(1)

    print("\nGreat! Take a look at your data:")
    dataset_manager.list_my_datasets(verbose=True)

approve_training_plan

approve_training_plan(tp_security_manager, id=None, *, sort_by_date=True)

Approves a given training plan that has either Pending or Rejected status

Parameters:

Name Type Description Default
tp_security_manager TrainingPlanSecurityManager

Object for managing the training plan approval

required
id Optional[str]

unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.

None
sort_by_date bool

whether to sort by last modification date. Defaults to True.

True
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def approve_training_plan(
    tp_security_manager: TrainingPlanSecurityManager,
    id: Optional[str] = None,
    *,
    sort_by_date: bool = True,
) -> None:
    """Approves a given training plan that has either Pending or Rejected status

    Args:
        tp_security_manager: Object for managing the training plan approval
        id: unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.
        sort_by_date: whether to sort by last modification date. Defaults to True.
    """

    def approve(training_plan_id):
        tp_security_manager.approve_training_plan(training_plan_id)
        logger.info(
            f"Training plan {training_plan_id} has been approved. "
            "Researchers can now train the Training Plan "
            "on this node."
        )

    # If id is already provided
    if id:
        return approve(id)

    if sort_by_date:
        sort_by = "date_modified"
    else:
        sort_by = None

    non_approved_training_plans = tp_security_manager.list_training_plans(
        sort_by=sort_by,
        select_status=[
            TrainingPlanApprovalStatus.PENDING,
            TrainingPlanApprovalStatus.REJECTED,
        ],
        verbose=False,
    )
    if not non_approved_training_plans:
        logger.warning(
            "All training_plans have been approved or no training plan has been registered... aborting"
        )
        return

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t training plan status "
        + m["training_plan_status"]
        + "\tdate_last_action "
        + str(m["date_last_action"])
        for m in non_approved_training_plans
    ]

    msg = "Select the training plan to approve:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(non_approved_training_plans))
            training_plan_id = non_approved_training_plans[opt_idx]["training_plan_id"]
            return approve(training_plan_id)
        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

delete_all_database

delete_all_database(dataset_manager)

Deletes all datasets from the node's database.

Does not modify the dataset's files.

Parameters:

Name Type Description Default
dataset_manager DatasetManager

Object for managing the dataset

required
Source code in fedbiomed/node/cli_utils/_database.py
def delete_all_database(dataset_manager: DatasetManager) -> None:
    """Deletes all datasets from the node's database.

    Does not modify the dataset's files.

    Args:
        dataset_manager: Object for managing the dataset
    """
    my_data = dataset_manager.list_my_datasets(verbose=False)

    if not my_data:
        logger.warning("No dataset to delete")
        return

    for ds in my_data:
        d_id = ds["dataset_id"]
        dataset_manager.dataset_table.delete_by_id(d_id)
        logger.info("Dataset removed for dataset_id:" + str(d_id))

delete_database

delete_database(dataset_manager, interactive=True)

Removes one or more dataset from the node's database.

Does not modify the dataset's files.

Parameters:

Name Type Description Default
interactive bool
  • if True interactively queries (repeatedly) from the command line for a dataset to delete
  • if False delete MNIST dataset if it exists in the database
True
Source code in fedbiomed/node/cli_utils/_database.py
def delete_database(dataset_manager: DatasetManager, interactive: bool = True) -> None:
    """Removes one or more dataset from the node's database.

    Does not modify the dataset's files.

    Args:
        interactive:

            - if `True` interactively queries (repeatedly) from the command line
                for a dataset to delete
            - if `False` delete MNIST dataset if it exists in the database
    """
    my_data = dataset_manager.list_my_datasets(verbose=False)
    if not my_data:
        logger.warning("No dataset to delete")
        return

    msg: str = ""
    d_id: Optional[str] = None

    if interactive:
        options = [d["name"] for d in my_data]
        msg = "Select the dataset to delete:\n"
        msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
        msg += "\nSelect: "

    while True:
        try:
            if interactive:
                opt_idx = int(input(msg)) - 1
                assert opt_idx in range(len(my_data))

                d_id = my_data[opt_idx]["dataset_id"]
            else:
                for ds in my_data:
                    if ds["name"] == "MNIST":
                        d_id = ds["dataset_id"]
                        break

            if not d_id:
                logger.warning("No matching dataset to delete")
                return
            dataset_manager.dataset_table.delete_by_id(d_id)
            logger.info("Dataset removed. Here your available datasets")
            dataset_manager.list_my_datasets()
            return
        except (ValueError, AssertionError):
            print("Invalid option. Please try again.")

delete_training_plan

delete_training_plan(tp_security_manager, id=None)

Deletes an authorized training plan in the database interactively from the CLI.

Does not modify or delete training plan file.

Deletes only registered and requested training_plans. For default training plans, files should be removed directly from the file system.

Parameters:

Name Type Description Default
tp_security_manager TrainingPlanSecurityManager

Object for managing the training plan approval

required
id Optional[str]

ID of the training plan that will be removed.

None
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def delete_training_plan(
    tp_security_manager: TrainingPlanSecurityManager, id: Optional[str] = None
) -> None:
    """Deletes an authorized training plan in the database interactively from the CLI.

    Does not modify or delete training plan file.

    Deletes only registered and requested training_plans. For default training plans, files
    should be removed directly from the file system.

    Args:
        tp_security_manager: Object for managing the training plan approval
        id: ID of the training plan that will be removed.
    """

    def delete(training_plan_id):
        # Delete training plan
        tp_security_manager.delete_training_plan(training_plan_id)
        logger.info("Training plan has been removed. Here your other training plans")
        tp_security_manager.list_training_plans(verbose=True)

    training_plans = tp_security_manager.list_training_plans(verbose=False)
    training_plans = [
        m
        for m in training_plans
        if m["training_plan_type"]
        in [TrainingPlanStatus.REGISTERED.value, TrainingPlanStatus.REQUESTED.value]
    ]
    if not training_plans:
        logger.warning("No training plans to delete")
        return

    if id:
        return delete(id)

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t Training plan type "
        + m["training_plan_type"]
        + "\tTraining plan status "
        + m["training_plan_status"]
        for m in training_plans
    ]
    msg = "Select the training plan to delete:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_id = training_plans[opt_idx]["training_plan_id"]

            if not training_plan_id:
                logger.warning("No matching training plan to delete")
                return
            return delete(training_plan_id)

        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

register_training_plan

register_training_plan(tp_security_manager)

Registers an authorized training plan in the database interactively through the CLI.

Does not modify training plan file.

Parameters:

Name Type Description Default
tp_security_manager TrainingPlanSecurityManager

Object for managing the training plan approval

required
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def register_training_plan(tp_security_manager: TrainingPlanSecurityManager):
    """Registers an authorized training plan in the database interactively through the CLI.

    Does not modify training plan file.

    Args:
        tp_security_manager: Object for managing the training plan approval
    """

    print("Welcome to the Fed-BioMed CLI data manager")
    name = input("Please enter a training plan name: ")
    description = input("Please enter a description for the training plan: ")

    # Allow files saved as txt
    path = validated_path_input(type="txt")

    # Register training plan
    try:
        tp_security_manager.register_training_plan(
            name=name, description=description, path=path
        )

    except AssertionError as e:
        if messagebox is not None:
            messagebox.showwarning(title="Warning", message=str(e))
        else:
            warnings.warn(f"[ERROR]: {e}", stacklevel=1)
        exit(1)

    print("\nGreat! Take a look at your data:")
    tp_security_manager.list_training_plans(verbose=True)

reject_training_plan

reject_training_plan(tp_security_manager, id=None, notes=None)

Rejects a given training plan that has either Pending or Approved status

Parameters:

Name Type Description Default
tp_security_manager TrainingPlanSecurityManager

Object for managing the training plan approval

required
id Optional[str]

Training plan ID

None
notes Optional[str]

Comment about rejection reason

None
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def reject_training_plan(
    tp_security_manager: TrainingPlanSecurityManager,
    id: Optional[str] = None,
    notes: Optional[str] = None,
) -> None:
    """Rejects a given training plan that has either Pending or Approved status

    Args:
        tp_security_manager: Object for managing the training plan approval
        id: Training plan ID
        notes: Comment about rejection reason
    """

    def reject(training_plan_id, notes):
        tp_security_manager.reject_training_plan(training_plan_id, notes)
        logger.info(
            f"Training plan {training_plan_id} has been rejected. "
            "Researchers can not train training plan "
            "on this node anymore"
        )

    approved_training_plans = tp_security_manager.list_training_plans(
        select_status=[
            TrainingPlanApprovalStatus.APPROVED,
            TrainingPlanApprovalStatus.PENDING,
        ],
        verbose=False,
    )

    if not approved_training_plans:
        logger.warning(
            "All training plans have already been rejected or no training plan has been registered... aborting"
        )
        return

    if id:
        reject(id, notes)
        return

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t training plan status "
        + m["training_plan_status"]
        + "\tTraining plan Type "
        + m["training_plan_type"]
        for m in approved_training_plans
    ]

    msg = "Select the training plan to reject (this will prevent Researcher to run training plan on Node):\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(approved_training_plans))
            training_plan_id = approved_training_plans[opt_idx]["training_plan_id"]
            notes = input(
                "Please give a note to explain why training plan has been rejected: \n"
            )
            reject(training_plan_id, notes)
            return

        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

update_training_plan

update_training_plan(tp_security_manager)

Updates an authorized training plan in the database interactively through the CLI.

Does not modify training plan file.

User can either choose different training plan file (different path) to update training plan or same training plan file.

Parameters:

Name Type Description Default
tp_security_manager TrainingPlanSecurityManager

Object for managing the training plan approval

required
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def update_training_plan(tp_security_manager: TrainingPlanSecurityManager):
    """Updates an authorized training plan in the database interactively through the CLI.

    Does not modify training plan file.

    User can either choose different training plan file (different path)
    to update training plan or same training plan file.

    Args:
        tp_security_manager: Object for managing the training plan approval
    """
    training_plans = tp_security_manager.list_training_plans(verbose=False)

    # Select only registered training plan to update
    training_plans = [
        m
        for m in training_plans
        if m["training_plan_type"] == TrainingPlanStatus.REGISTERED.value
    ]
    if not training_plans:
        logger.warning("No registered training plans has been found to update")
        return

    options = [
        m["name"] + "\t Training plan ID " + m["training_plan_id"]
        for m in training_plans
    ]
    msg = "Select the training plan to update:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\nSelect: "

    while True:
        try:
            # Get the selection
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_id = training_plans[opt_idx]["training_plan_id"]

            if not training_plan_id:
                logger.warning("No matching training plan to update")
                return

            # Get the new file or same file.  User can provide same training plan file
            # with updated content or new training plan file.
            path = validated_path_input(type="txt")

            # Update training plan through training plan manager
            tp_security_manager.update_training_plan_hash(training_plan_id, path)

            logger.info("Training plan has been updated. Here all your training plans")
            tp_security_manager.list_training_plans(verbose=True)

            return

        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")

view_training_plan

view_training_plan(tp_security_manager)

Views source code for a training plan in the database

If training plan cannot be displayed to the logger, then abort.

Parameters:

Name Type Description Default
tp_security_manager TrainingPlanSecurityManager

Object for managing the training plan approval

required
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def view_training_plan(tp_security_manager: TrainingPlanSecurityManager) -> None:
    """Views source code for a training plan in the database

    If training plan cannot be displayed to the logger, then abort.

    Args:
        tp_security_manager: Object for managing the training plan approval
    """
    training_plans = tp_security_manager.list_training_plans(verbose=False)
    if not training_plans:
        logger.warning("No training plan has been registered... aborting")
        return

    options = [
        m["name"]
        + "\t Training plan ID "
        + m["training_plan_id"]
        + "\t training plan status "
        + m["training_plan_status"]
        for m in training_plans
    ]

    msg = "Select the training plan to view:\n"
    msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
    msg += "\n\nDon't try to modify the training plan with this viewer, modifications will be dropped."
    msg += "\nSelect: "

    while True:
        try:
            opt_idx = int(input(msg)) - 1
            assert opt_idx in range(len(training_plans))
            training_plan_name = training_plans[opt_idx]["name"]
        except (ValueError, IndexError, AssertionError):
            logger.error("Invalid option. Please, try again.")
            continue

        # TODO: more robust (when refactor whole CLI)
        # - check `training_plan` though it should never be None, as we just checked for it
        # - check after file copy though it should work
        # - etc.
        training_plan = tp_security_manager.get_training_plan_by_name(
            training_plan_name
        )

        if training_plan is None:
            logger.critical(
                f"Training plan '{training_plan_name}' not found. Aborting."
            )
            return

        try:
            training_plan_source = highlight(
                training_plan["training_plan"], PythonLexer(), Terminal256Formatter()
            )
            logger.info(f"\n\n{training_plan_source}\n\n")
        except Exception as err:
            logger.critical(
                f"Cannot display training plan via logger. Aborting. Error message is: {err}"
            )

        return