to simplify imports from fedbiomed.node.cli_utils
Functions
add_database
add_database(dataset_manager, interactive=True, path=None, name=None, tags=None, description=None, data_type=None, dataset_parameters=None)
Adds a dataset to the node database.
Also queries interactively the user on the command line (and file browser) for dataset parameters if needed.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset_manager | DatasetManager | Object for managing the dataset | required |
interactive | bool | Whether to query interactively for dataset parameters even if they are all passed as arguments. Defaults to | True |
path | str | Path to the dataset. | None |
name | str | Keyword for the dataset. | None |
tags | str | Comma separated list of tags for the dataset. | None |
description | str | Human readable description of the dataset. | None |
data_type | str | Keyword for the data type of the dataset. | None |
dataset_parameters | dict | Parameters for the dataset manager | None |
Source code in fedbiomed/node/cli_utils/_database.py
def add_database(
dataset_manager: DatasetManager,
interactive: bool = True,
path: str = None,
name: str = None,
tags: str = None,
description: str = None,
data_type: str = None,
dataset_parameters: dict = None,
) -> None:
"""Adds a dataset to the node database.
Also queries interactively the user on the command line (and file browser)
for dataset parameters if needed.
Args:
dataset_manager: Object for managing the dataset
interactive: Whether to query interactively for dataset parameters
even if they are all passed as arguments. Defaults to `True`.
path: Path to the dataset.
name: Keyword for the dataset.
tags: Comma separated list of tags for the dataset.
description: Human readable description of the dataset.
data_type: Keyword for the data type of the dataset.
dataset_parameters: Parameters for the dataset manager
"""
data_loading_plan = None
# if all args are provided, just try to load the data
# if not, ask the user more information
need_interactive_input = (
interactive is True
or path is None
or name is None
or tags is None
or description is None
or data_type is None
)
if need_interactive_input:
# Interactive mode: collect dataset parameters from user
print("Welcome to the Fed-BioMed CLI data manager")
# Determine data type
if interactive is True:
data_type = validated_data_type_input()
else:
data_type = "default"
if data_type == "default":
name = "MNIST"
description = "MNIST database"
tags = ["#MNIST", "#dataset"]
if interactive is True:
tags = _confirm_predefined_dataset_tags(name, tags)
path = validated_path_input(data_type)
elif data_type == "mednist":
name = "MEDNIST"
description = "MEDNIST dataset"
tags = ["#MEDNIST", "#dataset"]
if interactive is True:
tags = _confirm_predefined_dataset_tags(name, tags)
path = validated_path_input(data_type)
# Handle custom datasets
else:
# Collect dataset metadata
name = input("Name of the database: ")
tags = input("Tags (separate them by comma and no spaces): ")
tags = tags.replace(" ", "").split(",")
description = input("Description: ")
if data_type == "medical-folder":
path, dataset_parameters, data_loading_plan = (
add_medical_folder_dataset_from_cli(
dataset_parameters, data_loading_plan
)
)
elif data_type == "custom":
path = Path(input("Path to the dataset: ")).resolve()
# Existence check
if not path.exists():
raise FedbiomedDatasetError(f"Path not found: {path}")
else:
path = validated_path_input(data_type)
# if a data loading plan was specified, we now ask for the description
if interactive is True and data_loading_plan is not None:
keep_asking_for_input = True
while keep_asking_for_input:
desc = input(
"Please input a short name/description for your data loading plan:"
)
if len(desc) < 4:
print("Description must be at least 4 characters long.")
else:
keep_asking_for_input = False
data_loading_plan.desc = desc
else:
# Non-interactive mode:
# all data have been provided at call
# check few things
# transform a string with comma(s) as a string list
tags = str(tags).split(",")
name = str(name)
description = str(description)
# Validate data type
data_type = str(data_type).lower()
if data_type not in [
"csv",
"default",
"mednist",
"images",
"medical-folder",
"custom",
]:
data_type = "default"
# Validate path
if not os.path.exists(path):
logger.critical("provided path does not exists: " + path)
# Ensure path is absolute
path = os.path.abspath(path)
logger.info(f"Dataset absolute path: {path}")
try:
dataset_manager.add_database(
name=name,
tags=tags,
data_type=data_type,
description=description,
path=path,
dataset_parameters=dataset_parameters,
data_loading_plan=data_loading_plan,
)
except (AssertionError, FedbiomedDatasetManagerError) as e:
if interactive is True and messagebox is not None:
messagebox.showwarning(title="Warning", message=str(e))
else:
warnings.warn(f"[ERROR]: {e}", stacklevel=1)
exit(1)
except FedbiomedDatasetError as err:
warnings.warn(
f"[ERROR]: {err} ... Aborting"
"\nHint: are you sure you have selected the correct index in Demographic file?",
stacklevel=1,
)
# Display success message
print("\nGreat! Take a look at your data:")
dataset_manager.list_my_datasets(verbose=True)
approve_training_plan
approve_training_plan(tp_security_manager, id=None, *, sort_by_date=True)
Approves a given training plan that has either Pending or Rejected status
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
tp_security_manager | TrainingPlanSecurityManager | Object for managing the training plan approval | required |
id | Optional[str] | unique if of the training plan to be approved. Providing an id will trigger non-interactive approval. | None |
sort_by_date | bool | whether to sort by last modification date. Defaults to True. | True |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def approve_training_plan(
tp_security_manager: TrainingPlanSecurityManager,
id: Optional[str] = None,
*,
sort_by_date: bool = True,
) -> None:
"""Approves a given training plan that has either Pending or Rejected status
Args:
tp_security_manager: Object for managing the training plan approval
id: unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.
sort_by_date: whether to sort by last modification date. Defaults to True.
"""
def approve(training_plan_id):
tp_security_manager.approve_training_plan(training_plan_id)
logger.info(
f"Training plan {training_plan_id} has been approved. "
"Researchers can now train the Training Plan "
"on this node."
)
# If id is already provided
if id:
return approve(id)
if sort_by_date:
sort_by = "date_modified"
else:
sort_by = None
non_approved_training_plans = tp_security_manager.list_training_plans(
sort_by=sort_by,
select_status=[
TrainingPlanApprovalStatus.PENDING,
TrainingPlanApprovalStatus.REJECTED,
],
verbose=False,
)
if not non_approved_training_plans:
logger.warning(
"All training_plans have been approved or no training plan has been registered... aborting"
)
return
options = [
m["name"]
+ "\t Training plan ID "
+ m["training_plan_id"]
+ "\t training plan status "
+ m["training_plan_status"]
+ "\tdate_last_action "
+ str(m["date_last_action"])
for m in non_approved_training_plans
]
msg = "Select the training plan to approve:\n"
msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(non_approved_training_plans))
training_plan_id = non_approved_training_plans[opt_idx]["training_plan_id"]
return approve(training_plan_id)
except (ValueError, IndexError, AssertionError):
logger.error("Invalid option. Please, try again.")
delete_all_database
delete_all_database(dataset_manager)
Deletes all datasets from the node's database.
Does not modify the dataset's files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dataset_manager | DatasetManager | Object for managing the dataset | required |
Source code in fedbiomed/node/cli_utils/_database.py
def delete_all_database(dataset_manager: DatasetManager) -> None:
"""Deletes all datasets from the node's database.
Does not modify the dataset's files.
Args:
dataset_manager: Object for managing the dataset
"""
my_data = dataset_manager.list_my_datasets(verbose=False)
if not my_data:
logger.warning("No dataset to delete")
return
for ds in my_data:
d_id = ds["dataset_id"]
dataset_manager.dataset_table.delete_by_id(d_id)
logger.info("Dataset removed for dataset_id:" + str(d_id))
delete_database
delete_database(dataset_manager, interactive=True)
Removes one or more dataset from the node's database.
Does not modify the dataset's files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
interactive | bool |
| True |
Source code in fedbiomed/node/cli_utils/_database.py
def delete_database(dataset_manager: DatasetManager, interactive: bool = True) -> None:
"""Removes one or more dataset from the node's database.
Does not modify the dataset's files.
Args:
interactive:
- if `True` interactively queries (repeatedly) from the command line
for a dataset to delete
- if `False` delete MNIST dataset if it exists in the database
"""
my_data = dataset_manager.list_my_datasets(verbose=False)
if not my_data:
logger.warning("No dataset to delete")
return
msg: str = ""
d_id: Union[str, None] = None
if interactive is True:
options = [d["name"] for d in my_data]
msg = "Select the dataset to delete:\n"
msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
if interactive is True:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(my_data))
d_id = my_data[opt_idx]["dataset_id"]
else:
for ds in my_data:
if ds["name"] == "MNIST":
d_id = ds["dataset_id"]
break
if not d_id:
logger.warning("No matching dataset to delete")
return
dataset_manager.dataset_table.delete_by_id(d_id)
logger.info("Dataset removed. Here your available datasets")
dataset_manager.list_my_datasets()
return
except (ValueError, IndexError, AssertionError):
logger.error("Invalid option. Please, try again.")
delete_training_plan
delete_training_plan(tp_security_manager, id=None)
Deletes an authorized training plan in the database interactively from the CLI.
Does not modify or delete training plan file.
Deletes only registered and requested training_plans. For default training plans, files should be removed directly from the file system.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
tp_security_manager | TrainingPlanSecurityManager | Object for managing the training plan approval | required |
id | Optional[str] | ID of the training plan that will be removed. | None |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def delete_training_plan(
tp_security_manager: TrainingPlanSecurityManager, id: Optional[str] = None
) -> None:
"""Deletes an authorized training plan in the database interactively from the CLI.
Does not modify or delete training plan file.
Deletes only registered and requested training_plans. For default training plans, files
should be removed directly from the file system.
Args:
tp_security_manager: Object for managing the training plan approval
id: ID of the training plan that will be removed.
"""
def delete(training_plan_id):
# Delete training plan
tp_security_manager.delete_training_plan(training_plan_id)
logger.info("Training plan has been removed. Here your other training plans")
tp_security_manager.list_training_plans(verbose=True)
training_plans = tp_security_manager.list_training_plans(verbose=False)
training_plans = [
m
for m in training_plans
if m["training_plan_type"]
in [TrainingPlanStatus.REGISTERED.value, TrainingPlanStatus.REQUESTED.value]
]
if not training_plans:
logger.warning("No training plans to delete")
return
if id:
return delete(id)
options = [
m["name"]
+ "\t Training plan ID "
+ m["training_plan_id"]
+ "\t Training plan type "
+ m["training_plan_type"]
+ "\tTraining plan status "
+ m["training_plan_status"]
for m in training_plans
]
msg = "Select the training plan to delete:\n"
msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(training_plans))
training_plan_id = training_plans[opt_idx]["training_plan_id"]
if not training_plan_id:
logger.warning("No matching training plan to delete")
return
return delete(training_plan_id)
except (ValueError, IndexError, AssertionError):
logger.error("Invalid option. Please, try again.")
register_training_plan
register_training_plan(tp_security_manager)
Registers an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
tp_security_manager | TrainingPlanSecurityManager | Object for managing the training plan approval | required |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def register_training_plan(tp_security_manager: TrainingPlanSecurityManager):
"""Registers an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
Args:
tp_security_manager: Object for managing the training plan approval
"""
print("Welcome to the Fed-BioMed CLI data manager")
name = input("Please enter a training plan name: ")
description = input("Please enter a description for the training plan: ")
# Allow files saved as txt
path = validated_path_input(type="txt")
# Register training plan
try:
tp_security_manager.register_training_plan(
name=name, description=description, path=path
)
except AssertionError as e:
if messagebox is not None:
messagebox.showwarning(title="Warning", message=str(e))
else:
warnings.warn(f"[ERROR]: {e}", stacklevel=1)
exit(1)
print("\nGreat! Take a look at your data:")
tp_security_manager.list_training_plans(verbose=True)
reject_training_plan
reject_training_plan(tp_security_manager, id=None, notes=None)
Rejects a given training plan that has either Pending or Approved status
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
tp_security_manager | TrainingPlanSecurityManager | Object for managing the training plan approval | required |
id | Optional[str] | Training plan ID | None |
notes | Optional[str] | Comment about rejection reason | None |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def reject_training_plan(
tp_security_manager: TrainingPlanSecurityManager,
id: Optional[str] = None,
notes: Optional[str] = None,
) -> None:
"""Rejects a given training plan that has either Pending or Approved status
Args:
tp_security_manager: Object for managing the training plan approval
id: Training plan ID
notes: Comment about rejection reason
"""
def reject(training_plan_id, notes):
tp_security_manager.reject_training_plan(training_plan_id, notes)
logger.info(
f"Training plan {training_plan_id} has been rejected. "
"Researchers can not train training plan "
"on this node anymore"
)
approved_training_plans = tp_security_manager.list_training_plans(
select_status=[
TrainingPlanApprovalStatus.APPROVED,
TrainingPlanApprovalStatus.PENDING,
],
verbose=False,
)
if not approved_training_plans:
logger.warning(
"All training plans have already been rejected or no training plan has been registered... aborting"
)
return
if id:
reject(id, notes)
return
options = [
m["name"]
+ "\t Training plan ID "
+ m["training_plan_id"]
+ "\t training plan status "
+ m["training_plan_status"]
+ "\tTraining plan Type "
+ m["training_plan_type"]
for m in approved_training_plans
]
msg = "Select the training plan to reject (this will prevent Researcher to run training plan on Node):\n"
msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(approved_training_plans))
training_plan_id = approved_training_plans[opt_idx]["training_plan_id"]
notes = input(
"Please give a note to explain why training plan has been rejected: \n"
)
reject(training_plan_id, notes)
return
except (ValueError, IndexError, AssertionError):
logger.error("Invalid option. Please, try again.")
update_training_plan
update_training_plan(tp_security_manager)
Updates an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
User can either choose different training plan file (different path) to update training plan or same training plan file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
tp_security_manager | TrainingPlanSecurityManager | Object for managing the training plan approval | required |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def update_training_plan(tp_security_manager: TrainingPlanSecurityManager):
"""Updates an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
User can either choose different training plan file (different path)
to update training plan or same training plan file.
Args:
tp_security_manager: Object for managing the training plan approval
"""
training_plans = tp_security_manager.list_training_plans(verbose=False)
# Select only registered training plan to update
training_plans = [
m
for m in training_plans
if m["training_plan_type"] == TrainingPlanStatus.REGISTERED.value
]
if not training_plans:
logger.warning("No registered training plans has been found to update")
return
options = [
m["name"] + "\t Training plan ID " + m["training_plan_id"]
for m in training_plans
]
msg = "Select the training plan to update:\n"
msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
# Get the selection
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(training_plans))
training_plan_id = training_plans[opt_idx]["training_plan_id"]
if not training_plan_id:
logger.warning("No matching training plan to update")
return
# Get the new file or same file. User can provide same training plan file
# with updated content or new training plan file.
path = validated_path_input(type="txt")
# Update training plan through training plan manager
tp_security_manager.update_training_plan_hash(training_plan_id, path)
logger.info("Training plan has been updated. Here all your training plans")
tp_security_manager.list_training_plans(verbose=True)
return
except (ValueError, IndexError, AssertionError):
logger.error("Invalid option. Please, try again.")
view_training_plan
view_training_plan(tp_security_manager)
Views source code for a training plan in the database
If training plan cannot be displayed to the logger, then abort.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
tp_security_manager | TrainingPlanSecurityManager | Object for managing the training plan approval | required |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def view_training_plan(tp_security_manager: TrainingPlanSecurityManager) -> None:
"""Views source code for a training plan in the database
If training plan cannot be displayed to the logger, then abort.
Args:
tp_security_manager: Object for managing the training plan approval
"""
training_plans = tp_security_manager.list_training_plans(verbose=False)
if not training_plans:
logger.warning("No training plan has been registered... aborting")
return
options = [
m["name"]
+ "\t Training plan ID "
+ m["training_plan_id"]
+ "\t training plan status "
+ m["training_plan_status"]
for m in training_plans
]
msg = "Select the training plan to view:\n"
msg += "\n".join([f"{i}) {d}" for i, d in enumerate(options, 1)])
msg += "\n\nDon't try to modify the training plan with this viewer, modifications will be dropped."
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(training_plans))
training_plan_name = training_plans[opt_idx]["name"]
except (ValueError, IndexError, AssertionError):
logger.error("Invalid option. Please, try again.")
continue
# TODO: more robust (when refactor whole CLI)
# - check `training_plan` though it should never be None, as we just checked for it
# - check after file copy though it should work
# - etc.
training_plan = tp_security_manager.get_training_plan_by_name(
training_plan_name
)
try:
training_plan_source = highlight(
training_plan["training_plan"], PythonLexer(), Terminal256Formatter()
)
logger.info(f"\n\n{training_plan_source}\n\n")
except Exception as err:
logger.critical(
f"Cannot display training plan via logger. Aborting. Error message is: {err}"
)
return