Source code for phenotypic.grid._auto_grid_finder

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from phenotypic import Image

import pandas as pd
import numpy as np
from scipy.optimize import minimize_scalar
from functools import partial

from phenotypic.abc_ import GridFinder
from phenotypic.tools.constants_ import BBOX, GRID


[docs] class AutoGridFinder(GridFinder): """ Automatically adjusts and processes grid configurations for images based on parameters like row and column counts, tolerance, and iteration constraints. This class extends `GridFinder` and adds flexibility to define custom grid specifications, compute padding, manage convergence criteria, and optimize grid alignment for image processing tasks. Attributes: __iter_limit (float): Internal limit for the maximum number of iterations. nrows (int): Number of rows for the grid structure. ncols (int): Number of columns for the grid structure. tol (float): Tolerance level to assess convergence. max_iter (int): Maximum allowable iterations, capped by the internal limit. """ __iter_limit = 1e5
[docs] def __init__( self, nrows: int = 8, ncols: int = 12, tol: float = 0.01, max_iter: int | None = None, ): """ Represents a configuration object for iterative computations with constraints on the number of nrows, columns, tolerance, and a maximum number of iterations. This provides a flexible structure enabling adjustments to the computation parameters such as matrix dimensions and convergence criteria. Attributes: nrows (int): Number of nrows for the computation grid or array. ncols (int): Number of columns for the computation grid or array. tol (float): Tolerance level for the convergence criteria. max_iter (int | None): Maximum number of allowable iterations. Defaults to the predefined internal convergence limit if not provided. """ super().__init__(nrows=nrows, ncols=ncols) self.tol: float = tol self.max_iter: int = max_iter if max_iter else self.__iter_limit
def _operate(self, image: Image) -> pd.DataFrame: """ Processes an arr image to calculate and organize grid-based boundaries and centroids using coordinates. This function implements a two-pass approach to refine row and column boundaries with exact precision, ensuring accurate grid labeling and indexing. The function dynamically computes boundary intervals and optimally segments the arr space into grids based on specified nrows and columns. Args: image (Image): The arr image to be analyzed and processed. Returns: pd.DataFrame: A DataFrame containing the grid results including boundary intervals, grid indices, and section numbers corresponding to the segmented arr image. """ # Calculate optimal edges using optimization row_edges = self.get_row_edges(image) col_edges = self.get_col_edges(image) # Use base class helper to assemble complete grid info return super()._get_grid_info( image=image, row_edges=row_edges, col_edges=col_edges ) def _find_padding_midpoint_error( self, pad_sz, image, axis, row_pad=0, col_pad=0 ) -> float: """ Calculate the mean squared error between object midpoints and grid bin midpoints. Args: pad_sz: Padding size to test for the specified axis. image: Image object containing objects to be gridded. axis: 0 for rows, 1 for columns. row_pad: Current row padding (used when optimizing columns). col_pad: Current column padding (used when optimizing rows). Returns: float: Mean squared error between object and bin midpoints. """ obj_info = image.objects.info(include_metadata=False) if axis == 0: # Calculate row edges with current padding row_edges = self._get_row_edges( image=image, row_padding=pad_sz, info_table=obj_info ) col_edges = self._get_col_edges( image=image, column_padding=col_pad, info_table=obj_info ) # Get grid info with these edges current_grid_info = super()._get_grid_info( image=image, row_edges=row_edges, col_edges=col_edges ) current_obj_midpoints = ( current_grid_info.loc[:, [str(BBOX.CENTER_RR), str(GRID.ROW_NUM)]] .groupby(str(GRID.ROW_NUM), observed=False)[str(BBOX.CENTER_RR)] .mean() .values ) bin_edges = np.histogram_bin_edges( a=current_grid_info.loc[:, str(BBOX.CENTER_RR)].values, bins=self.nrows, range=( current_grid_info.loc[:, str(BBOX.MIN_RR)].min() - pad_sz, current_grid_info.loc[:, str(BBOX.MAX_RR)].max() + pad_sz, ), ) elif axis == 1: # Calculate column edges with current padding row_edges = self._get_row_edges( image=image, row_padding=row_pad, info_table=obj_info ) col_edges = self._get_col_edges( image=image, column_padding=pad_sz, info_table=obj_info ) # Get grid info with these edges current_grid_info = super()._get_grid_info( image=image, row_edges=row_edges, col_edges=col_edges ) current_obj_midpoints = ( current_grid_info.loc[:, [str(BBOX.CENTER_CC), str(GRID.COL_NUM)]] .groupby(str(GRID.COL_NUM), observed=False)[str(BBOX.CENTER_CC)] .mean() .values ) bin_edges = np.histogram_bin_edges( a=current_grid_info.loc[:, str(BBOX.CENTER_CC)].values, bins=self.ncols, range=( current_grid_info.loc[:, str(BBOX.MIN_CC)].min() - pad_sz, current_grid_info.loc[:, str(BBOX.MAX_CC)].max() + pad_sz, ), ) else: raise ValueError(f"Invalid axis other_image: {axis}") bin_edges.sort() # (larger_point-smaller_point)/2 + smaller_point; Across all axis vectors larger_edges = bin_edges[1:] smaller_edges = bin_edges[:-1] bin_midpoint = (larger_edges - smaller_edges) // 2 + smaller_edges return ((current_obj_midpoints - bin_midpoint) ** 2).sum() / len( current_obj_midpoints ) def _get_optimal_row_pad(self, image: Image) -> int: """ Determines the optimal row padding for the given image by analyzing the metadata of the detected objects and finding the maximum allowable padding that adheres to the constraints of the image shape. Uses the object information from the image to compute the padding range, which is derived from the minimum and maximum bounding box nrows of the detected objects. Clips the calculated padding size in case it results in a negative value. Args: image (Image): The image object containing detected objects and their associated metadata. Returns: int: The optimal row padding value based on the image's object information and calculated constraints. """ obj_info = image.objects.info(include_metadata=False) min_rr, max_rr = ( obj_info.loc[:, str(BBOX.MIN_RR)].min(), obj_info.loc[:, str(BBOX.MAX_RR)].max(), ) max_row_pad_size = min(min_rr - 1, abs(image.shape[0] - max_rr - 1)) max_row_pad_size = ( 0 if max_row_pad_size < 0 else max_row_pad_size ) # Clip in case pad size is negative partial_row_pad_finder = partial( self._find_padding_midpoint_error, image=image, axis=0, row_pad=0, col_pad=0 ) return int( self._apply_solver( partial_row_pad_finder, max_value=max_row_pad_size, min_value=0 ) ) def _get_row_edges(self, image: Image, row_padding: int, info_table: pd.DataFrame): """ Determine the row edges of an image based on object positions and padding. This method calculates the edges defining nrows for objects within an image based on their positions provided in a DataFrame, applying padding and binning logic. The row edges are adjusted to fit within the boundaries of the image. Args: image (Image): The image where the row edges will be determined. The shape of the image is used to establish boundaries. row_padding (int): An additional padding applied to object bounds when calculating row edges. info_table (pd.DataFrame): A DataFrame containing object data, including their minimal and maximal row positions and central row coordinates. Returns: np.ndarray: An array of row edges sorted in ascending order. """ lower_row_bound = round(info_table.loc[:, str(BBOX.MIN_RR)].min() - row_padding) upper_row_bound = round(info_table.loc[:, str(BBOX.MAX_RR)].max() + row_padding) obj_row_range = np.clip( a=[lower_row_bound, upper_row_bound], a_min=0, a_max=image.shape[0] - 1, ) row_edges = np.histogram_bin_edges( a=info_table.loc[:, str(BBOX.CENTER_RR)], bins=self.nrows, range=tuple(obj_row_range), ) np.round(a=row_edges, out=row_edges) row_edges.sort() return row_edges.astype(int)
[docs] def get_row_edges(self, image: Image): """ Extracts and returns the edges of nrows from the given image. This method first calculates the optimal row padding for the provided image using an internal utility method and subsequently determines the row edges based on the calculated padding and metadata of the image. Args: image (Image): The input image from which the row edges need to be identified. Returns: list: A list representing the edges of the nrows in the image. """ optimal_row_padding = self._get_optimal_row_pad(image=image) return self._get_row_edges( image=image, row_padding=optimal_row_padding, info_table=image.objects.info(include_metadata=False), )
def _get_optimal_col_pad(self, image: Image) -> int: obj_info = image.objects.info(include_metadata=False) min_cc, max_cc = ( obj_info.loc[:, str(BBOX.MIN_CC)].min(), obj_info.loc[:, str(BBOX.MAX_CC)].max(), ) max_col_pad_size = min(min_cc - 1, abs(image.shape[1] - max_cc - 1)) max_col_pad_size = ( 0 if max_col_pad_size < 0 else max_col_pad_size ) # Clip in case pad size is negative partial_col_pad_finder = partial( self._find_padding_midpoint_error, image=image, axis=1, row_pad=0, col_pad=0 ) return self._apply_solver( partial_col_pad_finder, max_value=max_col_pad_size, min_value=0 ) def _get_col_edges( self, image: Image, column_padding: int, info_table: pd.DataFrame ): lower_col_bound = round( info_table.loc[:, str(BBOX.MIN_CC)].min() - column_padding ) upper_col_bound = round( info_table.loc[:, str(BBOX.MAX_CC)].max() + column_padding ) obj_col_range = np.clip( a=[lower_col_bound, upper_col_bound], a_min=0, a_max=image.shape[1] - 1, ) col_edges = np.histogram_bin_edges( a=info_table.loc[:, str(BBOX.CENTER_CC)], bins=self.ncols, range=tuple(obj_col_range), ) np.round(a=col_edges, out=col_edges) col_edges.sort() return col_edges.astype(int)
[docs] def get_col_edges(self, image: Image): optimal_col_padding = self._get_optimal_col_pad(image=image) return self._get_col_edges( image=image, column_padding=optimal_col_padding, info_table=image.objects.info(include_metadata=False), )
def _apply_solver(self, partial_cost_func, max_value, min_value=0) -> int: """Returns the optimal padding other_image that minimizes the mean squared differences between the object midpoints and grid midpoints.""" if max_value == 0: return 0 else: return round( minimize_scalar( partial_cost_func, bounds=(min_value, max_value), options={ "maxiter": self.max_iter if self.max_iter else 1000, "xatol": self.tol, }, ).x, )
AutoGridFinder.measure.__doc__ = AutoGridFinder._operate.__doc__ AutoGridFinder.__doc__ = GRID.append_rst_to_doc(AutoGridFinder.__doc__)