Skip to content

Commit

Permalink
Add completely custom output directory option
Browse files Browse the repository at this point in the history
  • Loading branch information
thompsonmj committed Oct 4, 2024
1 parent 0f4ef69 commit fad9b67
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 51 deletions.
14 changes: 13 additions & 1 deletion src/wing_segmenter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,16 @@ def main():
default='area',
help='Interpolation method to use when resizing. For upscaling, "lanczos4" is recommended.')

# **Remove the following two lines to avoid duplication**
# segment_parser.add_argument('--outputs-base-dir', default=None, help='Base path to store outputs for multiple runs.')
# segment_parser.add_argument('--custom-output-dir', default=None, help='Fully custom directory to store all output files for a run.')

# Output options within mutually exclusive group
output_group = segment_parser.add_mutually_exclusive_group()
output_group.add_argument('--outputs-base-dir', default=None, help='Base path to store outputs.')
output_group.add_argument('--custom-output-dir', default=None, help='Fully custom directory to store all output files.')

# General processing options
segment_parser.add_argument('--output-dir', default=None, help='Base path to store outputs.')
segment_parser.add_argument('--sam-model', default='facebook/sam-vit-base',
help='SAM model to use (e.g., facebook/sam-vit-base)')
segment_parser.add_argument('--yolo-model', default='imageomics/butterfly_segmentation_yolo_v8:yolov8m_shear_10.0_scale_0.5_translate_0.1_fliplr_0.0_best.pt',
Expand Down Expand Up @@ -94,6 +102,10 @@ def main():
if (args.remove_background or args.remove_bg_full) and not args.crop_by_class:
parser.error('--remove-background and --remove-bg-full require --crop-by-class to be set.')

# Ensure that if --custom-output-dir is set, --outputs-base-dir is not used
if args.custom_output_dir and args.outputs_base_dir:
parser.error('Cannot specify both --outputs-base-dir and --custom-output-dir. Choose one.')

# Execute the subcommand
if args.command == 'segment':
from wing_segmenter.segmenter import Segmenter
Expand Down
13 changes: 7 additions & 6 deletions src/wing_segmenter/metadata_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
import json
import os

def generate_uuid(parameters, namespace_uuid):
NAMESPACE_UUID = uuid.UUID('00000000-0000-0000-0000-000000000000')

def generate_uuid(parameters):
"""
Generates a UUID based on the hash of the parameters.
Generates a UUID based on the provided parameters and a fixed namespace UUID.
Parameters:
- parameters (dict): The parameters to hash.
- namespace_uuid (uuid.UUID): The namespace UUID.
Returns:
- uuid.UUID: The generated UUID.
"""
# Convert parameters to a sorted JSON string to ensure consistency
param_str = json.dumps(parameters, sort_keys=True).encode('utf-8')
return uuid.uuid5(namespace_uuid, hashlib.sha256(param_str).hexdigest())
# Serialize parameters to a sorted JSON string to ensure consistency
param_str = json.dumps(parameters, sort_keys=True)
return uuid.uuid5(NAMESPACE_UUID, param_str)

def get_dataset_hash(dataset_path):
"""
Expand Down
35 changes: 21 additions & 14 deletions src/wing_segmenter/path_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import uuid
from wing_segmenter.constants import CLASSES

def setup_paths(segmenter):
Expand All @@ -9,43 +8,51 @@ def setup_paths(segmenter):
Parameters:
- segmenter: The Segmenter instance.
"""
# Create output directory
dataset_name = os.path.basename(segmenter.dataset_path.rstrip('/\\'))
output_dir_name = f"{dataset_name}_{segmenter.run_uuid}"
if segmenter.output_base_dir:
# Determine output directory
if segmenter.custom_output_dir:
# User has specified a fully custom output directory
segmenter.output_dir = os.path.abspath(segmenter.custom_output_dir)
elif segmenter.output_base_dir:
# Create output directory based on base directory and run UUID
dataset_name = os.path.basename(segmenter.dataset_path.rstrip('/\\'))
output_dir_name = f"{dataset_name}_{segmenter.run_uuid}"
segmenter.output_dir = os.path.join(segmenter.output_base_dir, output_dir_name)
else:
# Default: Create output directory in the parent of the dataset path
dataset_name = os.path.basename(segmenter.dataset_path.rstrip('/\\'))
output_dir_name = f"{dataset_name}_{segmenter.run_uuid}"
segmenter.output_dir = os.path.join(os.path.dirname(segmenter.dataset_path), output_dir_name)

# Create the root output directory
os.makedirs(segmenter.output_dir, exist_ok=True)

# Metadata file path
segmenter.metadata_path = os.path.join(segmenter.output_dir, 'metadata.json')

# Prepare output subdirectories
# Define subdirectories
segmenter.resized_dir = os.path.join(segmenter.output_dir, 'resized')
segmenter.masks_dir = os.path.join(segmenter.output_dir, 'masks')
segmenter.viz_dir = os.path.join(segmenter.output_dir, 'seg_viz')
segmenter.crops_dir = os.path.join(segmenter.output_dir, 'crops')
segmenter.logs_dir = os.path.join(segmenter.output_dir, 'logs')

# Create subdirectories
os.makedirs(segmenter.resized_dir, exist_ok=True)
os.makedirs(segmenter.masks_dir, exist_ok=True)
os.makedirs(segmenter.viz_dir, exist_ok=True)
os.makedirs(segmenter.crops_dir, exist_ok=True)
os.makedirs(segmenter.logs_dir, exist_ok=True)


# Create directory for images with background removed if required
# Create directories for background removal if needed
if segmenter.remove_background and segmenter.save_intermediates:
# Create directory for background-removed cropped images
segmenter.crops_bkgd_removed_dir = os.path.join(segmenter.output_dir, 'crops_bkgd_removed')
os.makedirs(segmenter.crops_bkgd_removed_dir, exist_ok=True)

# Create directory for background-removed full images
segmenter.full_bkgd_removed_dir = os.path.join(segmenter.output_dir, 'full_bkgd_removed')
os.makedirs(segmenter.crops_bkgd_removed_dir, exist_ok=True)
os.makedirs(segmenter.full_bkgd_removed_dir, exist_ok=True)

# If resizing is enabled, create resized directories
# Additional directories based on resizing options
if segmenter.save_intermediates and segmenter.size:
os.makedirs(segmenter.resized_dir, exist_ok=True)

# Mask CSV path
# CSV file for segmentation info
segmenter.mask_csv = os.path.join(segmenter.output_dir, 'segmentation.csv')
27 changes: 17 additions & 10 deletions src/wing_segmenter/run_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from rich.table import Table
from rich.console import Console

def scan_runs(dataset_path, output_base_dir=None):
def scan_runs(dataset_path, output_base_dir=None, custom_output_dir=None):
dataset_path = os.path.abspath(dataset_path)

if not os.path.exists(dataset_path):
Expand All @@ -14,22 +14,29 @@ def scan_runs(dataset_path, output_base_dir=None):

# Determine the base directory to search for runs
dataset_name = os.path.basename(dataset_path.rstrip('/\\'))
if output_base_dir:
output_dir = os.path.abspath(output_base_dir)

if custom_output_dir:
# If a custom output directory is provided, scan only that directory without expecting specific naming
run_dirs = [custom_output_dir] if os.path.exists(custom_output_dir) else []
else:
output_dir = os.path.dirname(dataset_path)
if output_base_dir:
output_dir = os.path.abspath(output_base_dir)
else:
output_dir = os.path.dirname(dataset_path)

# Search for run directories in the specified output directory
pattern = f"{output_dir}/{dataset_name}_*"
run_dirs = glob.glob(pattern)
# Search for run directories in the specified output directory
pattern = f"{output_dir}/{dataset_name}_*"
run_dirs = glob.glob(pattern)

console = Console()

if not run_dirs:
if output_base_dir:
if custom_output_dir:
console.print(f"[red]No processing runs found in the custom output directory '{custom_output_dir}' for dataset '{dataset_name}'.[/red]")
elif output_base_dir:
console.print(f"[red]No processing runs found in '{output_dir}' for dataset '{dataset_name}'.[/red]")
else:
console.print(f"[red]No processing runs found for dataset '{dataset_name}' in default location ('{output_dir}'). If you saved results in a custom location, please use the `--output-dir` flag for the `scan-runs` command.[/red]")
console.print(f"[red]No processing runs found for dataset '{dataset_name}' in default location ('{output_dir}'). If you saved results in a custom location, please use the `--custom-output-dir` flag for the `scan-runs` command.[/red]")
return

console.print(f"[bold green]Found {len(run_dirs)} processing runs for dataset '{dataset_name}':[/bold green]\n")
Expand Down Expand Up @@ -81,7 +88,7 @@ def scan_runs(dataset_path, output_base_dir=None):
errors = str(metadata['run_status'].get('errors', 'None'))

# Truncate run UUID to save table space
run_uuid_prefix = os.path.basename(run_dir).split('_')[-1][:8]
run_uuid_prefix = os.path.basename(run_dir).split('_')[-1][:8] if not custom_output_dir else "CustomDir"

# Add row to the table
table.add_row(
Expand Down
32 changes: 12 additions & 20 deletions src/wing_segmenter/segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
import time
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

Expand Down Expand Up @@ -36,21 +35,12 @@ def __init__(self, config):
self.remove_bg_full = config.remove_bg_full
self.background_color = config.background_color if (self.remove_background or self.remove_bg_full) else None
self.segmentation_info = []
self.output_base_dir = os.path.abspath(config.output_dir) if config.output_dir else None

# Define your namespace UUID based on a string
self.NAMESPACE_UUID = uuid.uuid5(uuid.NAMESPACE_DNS, 'Imageomics Wing Segmentation')

# Handle resizing dimensions
if self.size:
if len(self.size) == 1:
self.width = self.height = self.size[0]
elif len(self.size) == 2:
self.width, self.height = self.size
else:
raise ValueError("Invalid size argument. Size must have either one or two values.")
else:
self.width = self.height = None # if no resizing use None
self.output_base_dir = os.path.abspath(config.outputs_base_dir) if config.outputs_base_dir else None
self.custom_output_dir = os.path.abspath(config.custom_output_dir) if config.custom_output_dir else None

# Ensure that only one of outputs_base_dir or custom_output_dir is used
if self.output_base_dir and self.custom_output_dir:
raise ValueError("Cannot specify both --outputs-base-dir and --custom-output-dir.")

# Prepare parameters for hashing
self.parameters_for_hash = {
Expand All @@ -59,8 +49,8 @@ def __init__(self, config):
'yolo_model_name': self.config.yolo_model,
'resize_mode': self.resize_mode,
'size': self.size if self.size else None,
'width': self.width,
'height': self.height,
'width': self.size[0] if self.size and len(self.size) == 1 else (self.size[0] if self.size else None),
'height': self.size[1] if self.size and len(self.size) == 2 else (self.size[0] if self.size and len(self.size) == 1 else None),
'padding_color': self.padding_color if self.resize_mode == 'pad' else None,
'interpolation': self.interpolation if self.size else None,
'save_intermediates': self.save_intermediates,
Expand All @@ -72,14 +62,16 @@ def __init__(self, config):
}

# Generate UUID based on parameters
self.run_uuid = generate_uuid(self.parameters_for_hash, self.NAMESPACE_UUID)
self.run_uuid = generate_uuid(self.parameters_for_hash)

# Setup output paths
setup_paths(self)

# Load models
self.yolo_model, self.sam_model, self.sam_processor = load_models(self.config, self.device)

def process_dataset(self):
start_time = time.time()
start_time = time.time()
errors_occurred = False

# Prepare image paths
Expand Down

0 comments on commit fad9b67

Please sign in to comment.