diff --git a/src/wing_segmenter/cli.py b/src/wing_segmenter/cli.py index 65bd325..a87b5c6 100644 --- a/src/wing_segmenter/cli.py +++ b/src/wing_segmenter/cli.py @@ -39,8 +39,16 @@ def main(): default='area', help='Interpolation method to use when resizing. For upscaling, "lanczos4" is recommended.') + # **Remove the following two lines to avoid duplication** + # segment_parser.add_argument('--outputs-base-dir', default=None, help='Base path to store outputs for multiple runs.') + # segment_parser.add_argument('--custom-output-dir', default=None, help='Fully custom directory to store all output files for a run.') + + # Output options within mutually exclusive group + output_group = segment_parser.add_mutually_exclusive_group() + output_group.add_argument('--outputs-base-dir', default=None, help='Base path to store outputs.') + output_group.add_argument('--custom-output-dir', default=None, help='Fully custom directory to store all output files.') + # General processing options - segment_parser.add_argument('--output-dir', default=None, help='Base path to store outputs.') segment_parser.add_argument('--sam-model', default='facebook/sam-vit-base', help='SAM model to use (e.g., facebook/sam-vit-base)') segment_parser.add_argument('--yolo-model', default='imageomics/butterfly_segmentation_yolo_v8:yolov8m_shear_10.0_scale_0.5_translate_0.1_fliplr_0.0_best.pt', @@ -94,6 +102,10 @@ def main(): if (args.remove_background or args.remove_bg_full) and not args.crop_by_class: parser.error('--remove-background and --remove-bg-full require --crop-by-class to be set.') + # Ensure that if --custom-output-dir is set, --outputs-base-dir is not used + if args.custom_output_dir and args.outputs_base_dir: + parser.error('Cannot specify both --outputs-base-dir and --custom-output-dir. Choose one.') + # Execute the subcommand if args.command == 'segment': from wing_segmenter.segmenter import Segmenter diff --git a/src/wing_segmenter/metadata_manager.py b/src/wing_segmenter/metadata_manager.py index 86678a9..5728d45 100644 --- a/src/wing_segmenter/metadata_manager.py +++ b/src/wing_segmenter/metadata_manager.py @@ -3,20 +3,21 @@ import json import os -def generate_uuid(parameters, namespace_uuid): +NAMESPACE_UUID = uuid.UUID('00000000-0000-0000-0000-000000000000') + +def generate_uuid(parameters): """ - Generates a UUID based on the hash of the parameters. + Generates a UUID based on the provided parameters and a fixed namespace UUID. Parameters: - parameters (dict): The parameters to hash. - - namespace_uuid (uuid.UUID): The namespace UUID. Returns: - uuid.UUID: The generated UUID. """ - # Convert parameters to a sorted JSON string to ensure consistency - param_str = json.dumps(parameters, sort_keys=True).encode('utf-8') - return uuid.uuid5(namespace_uuid, hashlib.sha256(param_str).hexdigest()) + # Serialize parameters to a sorted JSON string to ensure consistency + param_str = json.dumps(parameters, sort_keys=True) + return uuid.uuid5(NAMESPACE_UUID, param_str) def get_dataset_hash(dataset_path): """ diff --git a/src/wing_segmenter/path_manager.py b/src/wing_segmenter/path_manager.py index 2549305..d4d1575 100644 --- a/src/wing_segmenter/path_manager.py +++ b/src/wing_segmenter/path_manager.py @@ -1,5 +1,4 @@ import os -import uuid from wing_segmenter.constants import CLASSES def setup_paths(segmenter): @@ -9,43 +8,51 @@ def setup_paths(segmenter): Parameters: - segmenter: The Segmenter instance. """ - # Create output directory - dataset_name = os.path.basename(segmenter.dataset_path.rstrip('/\\')) - output_dir_name = f"{dataset_name}_{segmenter.run_uuid}" - if segmenter.output_base_dir: + # Determine output directory + if segmenter.custom_output_dir: + # User has specified a fully custom output directory + segmenter.output_dir = os.path.abspath(segmenter.custom_output_dir) + elif segmenter.output_base_dir: + # Create output directory based on base directory and run UUID + dataset_name = os.path.basename(segmenter.dataset_path.rstrip('/\\')) + output_dir_name = f"{dataset_name}_{segmenter.run_uuid}" segmenter.output_dir = os.path.join(segmenter.output_base_dir, output_dir_name) else: + # Default: Create output directory in the parent of the dataset path + dataset_name = os.path.basename(segmenter.dataset_path.rstrip('/\\')) + output_dir_name = f"{dataset_name}_{segmenter.run_uuid}" segmenter.output_dir = os.path.join(os.path.dirname(segmenter.dataset_path), output_dir_name) + + # Create the root output directory os.makedirs(segmenter.output_dir, exist_ok=True) # Metadata file path segmenter.metadata_path = os.path.join(segmenter.output_dir, 'metadata.json') - # Prepare output subdirectories + # Define subdirectories segmenter.resized_dir = os.path.join(segmenter.output_dir, 'resized') segmenter.masks_dir = os.path.join(segmenter.output_dir, 'masks') segmenter.viz_dir = os.path.join(segmenter.output_dir, 'seg_viz') segmenter.crops_dir = os.path.join(segmenter.output_dir, 'crops') segmenter.logs_dir = os.path.join(segmenter.output_dir, 'logs') + + # Create subdirectories + os.makedirs(segmenter.resized_dir, exist_ok=True) os.makedirs(segmenter.masks_dir, exist_ok=True) os.makedirs(segmenter.viz_dir, exist_ok=True) os.makedirs(segmenter.crops_dir, exist_ok=True) os.makedirs(segmenter.logs_dir, exist_ok=True) - - # Create directory for images with background removed if required + # Create directories for background removal if needed if segmenter.remove_background and segmenter.save_intermediates: - # Create directory for background-removed cropped images segmenter.crops_bkgd_removed_dir = os.path.join(segmenter.output_dir, 'crops_bkgd_removed') - os.makedirs(segmenter.crops_bkgd_removed_dir, exist_ok=True) - - # Create directory for background-removed full images segmenter.full_bkgd_removed_dir = os.path.join(segmenter.output_dir, 'full_bkgd_removed') + os.makedirs(segmenter.crops_bkgd_removed_dir, exist_ok=True) os.makedirs(segmenter.full_bkgd_removed_dir, exist_ok=True) - # If resizing is enabled, create resized directories + # Additional directories based on resizing options if segmenter.save_intermediates and segmenter.size: os.makedirs(segmenter.resized_dir, exist_ok=True) - # Mask CSV path + # CSV file for segmentation info segmenter.mask_csv = os.path.join(segmenter.output_dir, 'segmentation.csv') diff --git a/src/wing_segmenter/run_scanner.py b/src/wing_segmenter/run_scanner.py index 10f8b54..8026b98 100644 --- a/src/wing_segmenter/run_scanner.py +++ b/src/wing_segmenter/run_scanner.py @@ -4,7 +4,7 @@ from rich.table import Table from rich.console import Console -def scan_runs(dataset_path, output_base_dir=None): +def scan_runs(dataset_path, output_base_dir=None, custom_output_dir=None): dataset_path = os.path.abspath(dataset_path) if not os.path.exists(dataset_path): @@ -14,22 +14,29 @@ def scan_runs(dataset_path, output_base_dir=None): # Determine the base directory to search for runs dataset_name = os.path.basename(dataset_path.rstrip('/\\')) - if output_base_dir: - output_dir = os.path.abspath(output_base_dir) + + if custom_output_dir: + # If a custom output directory is provided, scan only that directory without expecting specific naming + run_dirs = [custom_output_dir] if os.path.exists(custom_output_dir) else [] else: - output_dir = os.path.dirname(dataset_path) + if output_base_dir: + output_dir = os.path.abspath(output_base_dir) + else: + output_dir = os.path.dirname(dataset_path) - # Search for run directories in the specified output directory - pattern = f"{output_dir}/{dataset_name}_*" - run_dirs = glob.glob(pattern) + # Search for run directories in the specified output directory + pattern = f"{output_dir}/{dataset_name}_*" + run_dirs = glob.glob(pattern) console = Console() if not run_dirs: - if output_base_dir: + if custom_output_dir: + console.print(f"[red]No processing runs found in the custom output directory '{custom_output_dir}' for dataset '{dataset_name}'.[/red]") + elif output_base_dir: console.print(f"[red]No processing runs found in '{output_dir}' for dataset '{dataset_name}'.[/red]") else: - console.print(f"[red]No processing runs found for dataset '{dataset_name}' in default location ('{output_dir}'). If you saved results in a custom location, please use the `--output-dir` flag for the `scan-runs` command.[/red]") + console.print(f"[red]No processing runs found for dataset '{dataset_name}' in default location ('{output_dir}'). If you saved results in a custom location, please use the `--custom-output-dir` flag for the `scan-runs` command.[/red]") return console.print(f"[bold green]Found {len(run_dirs)} processing runs for dataset '{dataset_name}':[/bold green]\n") @@ -81,7 +88,7 @@ def scan_runs(dataset_path, output_base_dir=None): errors = str(metadata['run_status'].get('errors', 'None')) # Truncate run UUID to save table space - run_uuid_prefix = os.path.basename(run_dir).split('_')[-1][:8] + run_uuid_prefix = os.path.basename(run_dir).split('_')[-1][:8] if not custom_output_dir else "CustomDir" # Add row to the table table.add_row( diff --git a/src/wing_segmenter/segmenter.py b/src/wing_segmenter/segmenter.py index fd3d5df..0a2d5b5 100644 --- a/src/wing_segmenter/segmenter.py +++ b/src/wing_segmenter/segmenter.py @@ -2,7 +2,6 @@ import json import logging import time -import uuid from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm @@ -36,21 +35,12 @@ def __init__(self, config): self.remove_bg_full = config.remove_bg_full self.background_color = config.background_color if (self.remove_background or self.remove_bg_full) else None self.segmentation_info = [] - self.output_base_dir = os.path.abspath(config.output_dir) if config.output_dir else None - - # Define your namespace UUID based on a string - self.NAMESPACE_UUID = uuid.uuid5(uuid.NAMESPACE_DNS, 'Imageomics Wing Segmentation') - - # Handle resizing dimensions - if self.size: - if len(self.size) == 1: - self.width = self.height = self.size[0] - elif len(self.size) == 2: - self.width, self.height = self.size - else: - raise ValueError("Invalid size argument. Size must have either one or two values.") - else: - self.width = self.height = None # if no resizing use None + self.output_base_dir = os.path.abspath(config.outputs_base_dir) if config.outputs_base_dir else None + self.custom_output_dir = os.path.abspath(config.custom_output_dir) if config.custom_output_dir else None + + # Ensure that only one of outputs_base_dir or custom_output_dir is used + if self.output_base_dir and self.custom_output_dir: + raise ValueError("Cannot specify both --outputs-base-dir and --custom-output-dir.") # Prepare parameters for hashing self.parameters_for_hash = { @@ -59,8 +49,8 @@ def __init__(self, config): 'yolo_model_name': self.config.yolo_model, 'resize_mode': self.resize_mode, 'size': self.size if self.size else None, - 'width': self.width, - 'height': self.height, + 'width': self.size[0] if self.size and len(self.size) == 1 else (self.size[0] if self.size else None), + 'height': self.size[1] if self.size and len(self.size) == 2 else (self.size[0] if self.size and len(self.size) == 1 else None), 'padding_color': self.padding_color if self.resize_mode == 'pad' else None, 'interpolation': self.interpolation if self.size else None, 'save_intermediates': self.save_intermediates, @@ -72,14 +62,16 @@ def __init__(self, config): } # Generate UUID based on parameters - self.run_uuid = generate_uuid(self.parameters_for_hash, self.NAMESPACE_UUID) + self.run_uuid = generate_uuid(self.parameters_for_hash) + # Setup output paths setup_paths(self) + # Load models self.yolo_model, self.sam_model, self.sam_processor = load_models(self.config, self.device) def process_dataset(self): - start_time = time.time() + start_time = time.time() errors_occurred = False # Prepare image paths