to simplify imports from fedbiomed.node.cli_utils
Attributes
dataset_manager module-attribute
dataset_manager = DatasetManager()
tp_security_manager module-attribute
tp_security_manager = TrainingPlanSecurityManager()
Functions
add_database
add_database(interactive=True, path=None, name=None, tags=None, description=None, data_type=None, dataset_parameters=None)
Adds a dataset to the node database.
Also queries interactively the user on the command line (and file browser) for dataset parameters if needed.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
interactive | bool | Whether to query interactively for dataset parameters even if they are all passed as arguments. Defaults to | True |
path | str | Path to the dataset. | None |
name | str | Keyword for the dataset. | None |
tags | str | Comma separated list of tags for the dataset. | None |
description | str | Human readable description of the dataset. | None |
data_type | str | Keyword for the data type of the dataset. | None |
Source code in fedbiomed/node/cli_utils/_database.py
def add_database(interactive: bool = True,
path: str = None,
name: str = None,
tags: str = None,
description: str = None,
data_type: str = None,
dataset_parameters: dict = None):
"""Adds a dataset to the node database.
Also queries interactively the user on the command line (and file browser)
for dataset parameters if needed.
Args:
interactive: Whether to query interactively for dataset parameters
even if they are all passed as arguments. Defaults to `True`.
path: Path to the dataset.
name: Keyword for the dataset.
tags: Comma separated list of tags for the dataset.
description: Human readable description of the dataset.
data_type: Keyword for the data type of the dataset.
"""
dataset_parameters = dataset_parameters or None
data_loading_plan = None
# if all args are provided, just try to load the data
# if not, ask the user more informations
if interactive or \
path is None or \
name is None or \
tags is None or \
description is None or \
data_type is None :
print('Welcome to the Fed-BioMed CLI data manager')
if interactive is True:
data_type = validated_data_type_input()
else:
data_type = 'default'
if data_type == 'default':
tags = ['#MNIST', "#dataset"]
if interactive is True:
while input(f'MNIST will be added with tags {tags} [y/N]').lower() != 'y':
pass
path = validated_path_input(data_type)
name = 'MNIST'
description = 'MNIST database'
elif data_type == 'mednist':
tags = ['#MEDNIST', "#dataset"]
if interactive is True:
while input(f'MEDNIST will be added with tags {tags} [y/N]').lower() != 'y':
pass
path = validated_path_input(data_type)
name = 'MEDNIST'
description = 'MEDNIST dataset'
else:
name = input('Name of the database: ')
tags = input('Tags (separate them by comma and no spaces): ')
tags = tags.replace(' ', '').split(',')
description = input('Description: ')
if data_type == 'medical-folder':
path, dataset_parameters, data_loading_plan = add_medical_folder_dataset_from_cli(interactive,
dataset_parameters,
data_loading_plan)
elif data_type == 'flamby':
path = None # flamby datasets are not identified by their path
# Select the type of dataset (fed_ixi, fed_heart, etc...)
available_flamby_datasets = discover_flamby_datasets()
msg = "Please select the FLamby dataset that you're configuring:\n"
msg += "\n".join([f"\t{i}) {val}" for i, val in available_flamby_datasets.items()])
msg += "\nselect: "
keep_asking_for_input = True
while keep_asking_for_input:
try:
flamby_dataset_index = input(msg)
flamby_dataset_index = int(flamby_dataset_index)
# check that the user inserted a number within the valid range
if flamby_dataset_index in available_flamby_datasets.keys():
keep_asking_for_input = False
else:
warnings.warn(f"Please pick a number in the range {list(available_flamby_datasets.keys())}")
except ValueError:
warnings.warn('Please input a numeric value (integer)')
# Select the center id
module = import_module(f".{available_flamby_datasets[flamby_dataset_index]}", package='flamby.datasets')
n_centers = module.NUM_CLIENTS
keep_asking_for_input = True
while keep_asking_for_input:
try:
center_id = int(input(f"Give a center id between 0 and {str(n_centers-1)}: "))
if 0 <= center_id < n_centers:
keep_asking_for_input = False
except ValueError:
warnings.warn(f'Please input a numeric value (integer) between 0 and {str(n_centers-1)}')
# Build the DataLoadingPlan with the selected dataset type and center id
data_loading_plan = DataLoadingPlan()
metadata_dlb = FlambyDatasetMetadataBlock()
metadata_dlb.metadata = {
'flamby_dataset_name': available_flamby_datasets[flamby_dataset_index],
'flamby_center_id': center_id
}
data_loading_plan[FlambyLoadingBlockTypes.FLAMBY_DATASET_METADATA] = metadata_dlb
else:
path = validated_path_input(data_type)
# if a data loading plan was specified, we now ask for the description
if interactive and data_loading_plan is not None:
keep_asking_for_input = True
while keep_asking_for_input:
desc = input('Please input a short name/description for your data loading plan:')
if len(desc) < 4:
print('Description must be at least 4 characters long.')
else:
keep_asking_for_input = False
data_loading_plan.desc = desc
else:
# all data have been provided at call
# check few things
# transform a string with coma(s) as a string list
tags = str(tags).split(',')
name = str(name)
description = str(description)
data_type = str(data_type).lower()
if data_type not in [ 'csv', 'default', 'mednist', 'images', 'medical-folder']:
data_type = 'default'
if not os.path.exists(path):
logger.critical("provided path does not exists: " + path)
logger.info(f"PATH VALUE {path}")
# Add database
try:
dataset_manager.add_database(name=name,
tags=tags,
data_type=data_type,
description=description,
path=path,
dataset_parameters=dataset_parameters,
data_loading_plan=data_loading_plan)
except (AssertionError, FedbiomedDatasetManagerError) as e:
if interactive is True:
try:
tkinter.messagebox.showwarning(title='Warning', message=str(e))
except ModuleNotFoundError:
warnings.warn(f'[ERROR]: {e}')
else:
warnings.warn(f'[ERROR]: {e}')
exit(1)
except FedbiomedDatasetError as err:
warnings.warn(f'[ERROR]: {err} ... Aborting'
"\nHint: are you sure you have selected the correct index in Demographic file?")
print('\nGreat! Take a look at your data:')
dataset_manager.list_my_data(verbose=True)
approve_training_plan
approve_training_plan(id=None, *, sort_by_date=True)
Approves a given training plan that has either Pending or Rejected status
Parameters:
Name | Type | Description | Default |
---|---|---|---|
sort_by_date | bool | whether to sort by last modification date. Defaults to True. | True |
id | Optional[str] | unique if of the training plan to be approved. Providing an id will trigger non-interactive approval. | None |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def approve_training_plan(id: Optional[str] = None, *, sort_by_date: bool = True):
"""Approves a given training plan that has either Pending or Rejected status
Args:
sort_by_date: whether to sort by last modification date. Defaults to True.
id: unique if of the training plan to be approved. Providing an id will trigger non-interactive approval.
"""
def approve(training_plan_id):
tp_security_manager.approve_training_plan(training_plan_id)
logger.info(f"Training plan {training_plan_id} has been approved. "
"Researchers can now train the Training Plan "
"on this node.")
# If id is already provided
if id:
return approve(id)
if sort_by_date:
sort_by = 'date_modified'
else:
sort_by = None
non_approved_training_plans = tp_security_manager.list_training_plans(
sort_by=sort_by,
select_status=[TrainingPlanApprovalStatus.PENDING,
TrainingPlanApprovalStatus.REJECTED],
verbose=False)
if not non_approved_training_plans:
logger.warning("All training_plans have been approved or no training plan has been registered... aborting")
return
options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t training plan status ' +
m['training_plan_status'] + '\tdate_last_action ' +
str(m['date_last_action']) for m in non_approved_training_plans]
msg = "Select the training plan to approve:\n"
msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(non_approved_training_plans))
training_plan_id = non_approved_training_plans[opt_idx]['training_plan_id']
return approve(training_plan_id)
except (ValueError, IndexError, AssertionError):
logger.error('Invalid option. Please, try again.')
delete_all_database
delete_all_database()
Deletes all datasets from the node's database.
Does not modify the dataset's files.
Source code in fedbiomed/node/cli_utils/_database.py
def delete_all_database():
"""Deletes all datasets from the node's database.
Does not modify the dataset's files.
"""
my_data = dataset_manager.list_my_data(verbose=False)
if not my_data:
logger.warning('No dataset to delete')
return
for ds in my_data:
d_id = ds['dataset_id']
dataset_manager.remove_database(d_id)
logger.info('Dataset removed for dataset_id:' + str(d_id))
return
delete_database
delete_database(interactive=True)
Removes one or more dataset from the node's database.
Does not modify the dataset's files.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
interactive | bool |
| True |
Source code in fedbiomed/node/cli_utils/_database.py
def delete_database(interactive: bool = True):
"""Removes one or more dataset from the node's database.
Does not modify the dataset's files.
Args:
interactive:
- if `True` interactively queries (repeatedly) from the command line
for a dataset to delete
- if `False` delete MNIST dataset if it exists in the database
"""
my_data = dataset_manager.list_my_data(verbose=False)
if not my_data:
logger.warning('No dataset to delete')
return
if interactive is True:
options = [d['name'] for d in my_data]
msg = "Select the dataset to delete:\n"
msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
if interactive is True:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(my_data))
d_id = my_data[opt_idx]['dataset_id']
else:
for ds in my_data:
if ds['name'] == 'MNIST':
d_id = ds['dataset_id']
break
if not d_id:
logger.warning('No matching dataset to delete')
return
dataset_manager.remove_database(d_id)
logger.info('Dataset removed. Here your available datasets')
dataset_manager.list_my_data()
return
except (ValueError, IndexError, AssertionError):
logger.error('Invalid option. Please, try again.')
delete_training_plan
delete_training_plan(id=None)
Deletes an authorized training plan in the database interactively from the CLI.
Does not modify or delete training plan file.
Deletes only registered and requested training_plans. For default training plans, files should be removed directly from the file system.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name | Name of the training plan that will be removed. | required |
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def delete_training_plan(id: Optional[str] = None):
"""Deletes an authorized training plan in the database interactively from the CLI.
Does not modify or delete training plan file.
Deletes only registered and requested training_plans. For default training plans, files
should be removed directly from the file system.
Args:
name: Name of the training plan that will be removed.
"""
def delete(training_plan_id):
# Delete training plan
tp_security_manager.delete_training_plan(training_plan_id)
logger.info('Training plan has been removed. Here your other training plans')
tp_security_manager.list_training_plans(verbose=True)
training_plans = tp_security_manager.list_training_plans(verbose=False)
training_plans = [m for m in training_plans if m['training_plan_type'] in [TrainingPlanStatus.REGISTERED.value,
TrainingPlanStatus.REQUESTED.value]]
if not training_plans:
logger.warning('No training plans to delete')
return
if id:
return delete(id)
options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t Training plan type ' +
m['training_plan_type'] + '\tTraining plan status ' + m['training_plan_status'] for m in training_plans]
msg = "Select the training plan to delete:\n"
msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(training_plans))
training_plan_id = training_plans[opt_idx]['training_plan_id']
if not training_plan_id:
logger.warning('No matching training plan to delete')
return
return delete(training_plan_id)
except (ValueError, IndexError, AssertionError):
logger.error('Invalid option. Please, try again.')
register_training_plan
register_training_plan()
Registers an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def register_training_plan():
"""Registers an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
"""
print('Welcome to the Fed-BioMed CLI data manager')
name = input('Please enter a training plan name: ')
description = input('Please enter a description for the training plan: ')
# Allow files saved as txt
path = validated_path_input(type="txt")
# Register training plan
try:
tp_security_manager.register_training_plan(name=name,
description=description,
path=path)
except AssertionError as e:
try:
tkinter.messagebox.showwarning(title='Warning', message=str(e))
except ModuleNotFoundError:
warnings.warn(f'[ERROR]: {e}')
exit(1)
print('\nGreat! Take a look at your data:')
tp_security_manager.list_training_plans(verbose=True)
reject_training_plan
reject_training_plan(id=None, notes=None)
Rejects a given training plan that has either Pending or Approved status
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def reject_training_plan(id: Optional[str] = None, notes: Optional[str] = None):
"""Rejects a given training plan that has either Pending or Approved status
"""
def reject(training_plan_id, notes):
tp_security_manager.reject_training_plan(training_plan_id, notes)
logger.info(f"Training plan {training_plan_id} has been rejected. "
"Researchers can not train training plan "
"on this node anymore")
approved_training_plans = tp_security_manager.list_training_plans(
select_status=[TrainingPlanApprovalStatus.APPROVED,
TrainingPlanApprovalStatus.PENDING],
verbose=False)
if not approved_training_plans:
logger.warning("All training plans have already been rejected or no training plan has been registered... aborting")
return
if id:
reject(id, notes)
return
options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t training plan status ' +
m['training_plan_status'] + '\tTraining plan Type ' + m['training_plan_type'] for m in approved_training_plans]
msg = "Select the training plan to reject (this will prevent Researcher to run training plan on Node):\n"
msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(approved_training_plans))
training_plan_id = approved_training_plans[opt_idx]['training_plan_id']
notes = input("Please give a note to explain why training plan has been rejected: \n")
reject(training_plan_id, notes)
return
except (ValueError, IndexError, AssertionError):
logger.error('Invalid option. Please, try again.')
update_training_plan
update_training_plan()
Updates an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
User can either choose different training plan file (different path) to update training plan or same training plan file.
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def update_training_plan():
"""Updates an authorized training plan in the database interactively through the CLI.
Does not modify training plan file.
User can either choose different training plan file (different path)
to update training plan or same training plan file.
"""
training_plans = tp_security_manager.list_training_plans(verbose=False)
# Select only registered training plan to update
training_plans = [m for m in training_plans if m['training_plan_type'] == TrainingPlanStatus.REGISTERED.value]
if not training_plans:
logger.warning('No registered training plans has been found to update')
return
options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] for m in training_plans]
msg = "Select the training plan to update:\n"
msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
msg += "\nSelect: "
while True:
try:
# Get the selection
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(training_plans))
training_plan_id = training_plans[opt_idx]['training_plan_id']
if not training_plan_id:
logger.warning('No matching training plan to update')
return
# Get the new file or same file. User can provide same training plan file
# with updated content or new training plan file.
path = validated_path_input(type="txt")
# Update training plan through training plan manager
tp_security_manager.update_training_plan_hash(training_plan_id, path)
logger.info('Training plan has been updated. Here all your training plans')
tp_security_manager.list_training_plans(verbose=True)
return
except (ValueError, IndexError, AssertionError):
logger.error('Invalid option. Please, try again.')
view_training_plan
view_training_plan()
Views source code for a training plan in the database
If environ[EDITOR]
is set then use this editor to view a copy of the training plan source code, so that any modification are not saved to the training plan,
If environ[EDITOR]
is unset or cannot be used to view the training plan, then print the training plan to the logger.
If training plan cannot be displayed to the logger, then abort.
Source code in fedbiomed/node/cli_utils/_training_plan_management.py
def view_training_plan():
"""Views source code for a training plan in the database
If `environ[EDITOR]` is set then use this editor to view a copy of the training plan source code, so that
any modification are not saved to the training plan,
If `environ[EDITOR]` is unset or cannot be used to view the training plan, then print the training plan to the logger.
If training plan cannot be displayed to the logger, then abort.
"""
training_plans = tp_security_manager.list_training_plans(verbose=False)
if not training_plans:
logger.warning("No training plan has been registered... aborting")
return
options = [m['name'] + '\t Training plan ID ' + m['training_plan_id'] + '\t training plan status ' +
m['training_plan_status'] for m in training_plans]
msg = "Select the training plan to view:\n"
msg += "\n".join([f'{i}) {d}' for i, d in enumerate(options, 1)])
msg += "\n\nDon't try to modify the training plan with this viewer, modifications will be dropped."
msg += "\nSelect: "
while True:
try:
opt_idx = int(input(msg)) - 1
assert opt_idx in range(len(training_plans))
training_plan_name = training_plans[opt_idx]['name']
except (ValueError, IndexError, AssertionError):
logger.error('Invalid option. Please, try again.')
continue
# TODO: more robust (when refactor whole CLI)
# - check `training_plan` though it should never be None, as we just checked for it
# - check after file copy though it should work
# - etc.
training_plan = tp_security_manager.get_training_plan_by_name(training_plan_name)
try:
training_plan_source = highlight(training_plan["training_plan"], PythonLexer(), Terminal256Formatter())
logger.info(f'\n\n{training_plan_source}\n\n')
except Exception as err:
logger.critical(f'Cannot display training plan via logger. Aborting. Error message is: {err}')
return