Source code for phenotypic.phenotypic_cli

"""
PhenoTypic CLI
==============

A command-line interface for executing PhenoTypic ImagePipelines on directories of images.
This script allows for parallel processing of images, saving both measurements and
visual quality control overlays.

Usage:
    python -m phenotypic PIPELINE_JSON INPUT_DIR OUTPUT_DIR [OPTIONS]

Example:
    python -m phenotypic my_pipeline.json ./raw_images ./results --n-jobs 4
"""

import sys
import click
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from pathlib import Path
from joblib import Parallel, delayed
from typing import Optional, Type, Dict, Any, List

import phenotypic
from phenotypic import Image, GridImage, ImagePipeline
from phenotypic.tools.constants_ import IO

# Set non-interactive backend for headless execution
matplotlib.use("Agg")


[docs] def process_single_image( image_path: Path, meas_dir: Path, overlay_dir: Path, pipeline: ImagePipeline, image_cls: Type[Image], read_kwargs: Dict[str, Any], ) -> Optional[pd.DataFrame]: """ Processes a single image of a microbe colony on solid media agar by applying an image processing pipeline, generating measurements, and creating a graphical overlay output. This function is highly versatile, allowing the user to control how images are read, analyzed, and stored based on provided arguments. Args: image_path (Path): Path to the image file representing the microbe colony on agar. Adjusting this variable changes which colony image is analyzed. meas_dir (Path): Directory where the measurement results (CSV) will be saved. The choice of directory affects the organization of analysis results and resultant data pipeline workflows. overlay_dir (Path): Directory for saving visual overlays. This allows inspection of how the overlay corresponds to the processed regions in the image. Choose a directory accessible to tools used for review. pipeline (ImagePipeline): A sequence of image processing steps applied to the input image. The pipeline heavily influences the analysis' sensitivity and accuracy in extracting colony features like size, shape, or density. image_cls (Type[Image]): Class responsible for reading and processing the input image. Changing this affects how the image format is handled (e.g., handling raw images produced in specific microscopy settings). read_kwargs (Dict[str, Any]): Parameters passed when reading the image (e.g., color modes, compression). Modifying these parameters tailors how images are interpreted and may change the fidelity of image data used in downstream analyses. Returns: Optional[pd.DataFrame]: A DataFrame containing microbiological measurements for the processed image, such as colony area, perimeter, and optical density. If processing fails, returns None. Adjustments in inputs or pipeline steps directly affect the resulting metrics. Raises: This function handles all internal exceptions and reports processing failures with user-friendly messages, allowing review of errors without interrupting a batch process. """ try: # Create specific output path for this image's results # We use the image stem for naming image_stem = image_path.stem # Load image # We need to handle rawpy_params if needed, but for CLI we'll stick to basics for now image = image_cls.imread(image_path, **read_kwargs) # Execute pipeline # We use inplace=True to save memory, though pipeline operations might copy internally meas = pipeline.apply_and_measure(image, inplace=True) # Save measurements for this individual image meas_path = meas_dir / f"{image_stem}.csv" meas.to_csv(meas_path, index=False) # Generate and save overlay # We suppress the plot display since we are in a CLI fig, ax = image.show_overlay() overlay_path = overlay_dir / f"{image_stem}.png" fig.savefig(overlay_path, bbox_inches="tight") plt.close(fig) return meas except Exception as e: click.echo(f"Error processing {image_path.name}: {str(e)}", err=True) return None
@click.command() @click.argument( "pipeline_json", type=click.Path(exists=True, dir_okay=False, path_type=Path) ) @click.argument( "input_dir", type=click.Path(exists=True, file_okay=False, path_type=Path) ) @click.argument("output_dir", type=click.Path(path_type=Path)) @click.option( "--image-type", type=click.Choice(["Image", "GridImage"], case_sensitive=False), default="GridImage", help="Type of image object to instantiate.", ) @click.option( "--nrows", type=int, default=8, show_default=True, help="Number of rows for GridImage.", ) @click.option( "--ncols", type=int, default=12, show_default=True, help="Number of columns for GridImage.", ) @click.option( "--bit-depth", type=int, default=None, help="Bit depth of input images (8 or 16)." ) @click.option( "--n-jobs", type=int, default=-1, show_default=True, help="Number of parallel jobs. -1 uses all available cores.", ) def main( pipeline_json: Path, input_dir: Path, output_dir: Path, image_type: str, nrows: int, ncols: int, bit_depth: Optional[int], n_jobs: int, ): """ Execute a PhenoTypic pipeline on a directory of images. PIPELINE_JSON: Path to the exported pipeline configuration file. INPUT_DIR: Directory containing the images to process. OUTPUT_DIR: Directory where results (CSVs and overlays) will be saved. """ # Setup output_dir.mkdir(parents=True, exist_ok=True) meas_dir = output_dir / "measurements" meas_dir.mkdir(parents=True, exist_ok=True) overlay_dir = output_dir / "overlays" overlay_dir.mkdir(parents=True, exist_ok=True) click.echo(f"Loading pipeline from {pipeline_json}...") try: pipeline = ImagePipeline.from_json(pipeline_json) except Exception as e: click.echo(f"Failed to load pipeline: {e}", err=True) sys.exit(1) # Determine Image Class and Arguments if image_type == "GridImage": image_cls = GridImage read_kwargs = {"nrows": nrows, "ncols": ncols} else: image_cls = Image read_kwargs = {} if bit_depth: read_kwargs["bit_depth"] = bit_depth # Find images extensions = IO.ACCEPTED_FILE_EXTENSIONS + IO.RAW_FILE_EXTENSIONS image_paths = [ p for p in input_dir.iterdir() if p.is_file() and p.suffix.lower() in extensions ] if not image_paths: click.echo(f"No valid images found in {input_dir}", err=True) sys.exit(1) click.echo( f"Found {len(image_paths)} images. Starting processing with {n_jobs} jobs..." ) # Parallel Execution # We use joblib to parallelize the processing results = Parallel(n_jobs=n_jobs)( delayed(process_single_image)( path, meas_dir, overlay_dir, pipeline, image_cls, read_kwargs ) for path in image_paths ) # Aggregate Results valid_results = [res for res in results if res is not None] if valid_results: click.echo( f"Successfully processed {len(valid_results)}/{len(image_paths)} images." ) master_df = pd.concat(valid_results, axis=0, ignore_index=True) master_path = output_dir / "master_measurements.csv" master_df.to_csv(master_path, index=False) click.echo(f"Master measurements saved to {master_path}") else: click.echo("No images were successfully processed.", err=True) sys.exit(1) if __name__ == "__main__": main()