Source code for detectree.pixel_response

"""Build pixel binary (tree/non-tree) responses."""

import glob
from os import path

import numpy as np
import rasterio as rio

from . import settings, utils

__all__ = ["PixelResponseBuilder"]


class NonBinaryResponseError(Exception):
    pass


[docs] class PixelResponseBuilder: """Customize how pixel responses (tree/non-tree labels) are computed.""" # It is really not necessary to use a class for this, but we do so for the sake of # API consistency with the `pixel_features` module
[docs] def __init__(self, *, tree_val=None, nontree_val=None): """ Initialize the pixel response builder. See the `background <https://bit.ly/2KlCICO>`_ example notebook for more details. Parameters ---------- tree_val : int, optional The value that designates tree pixels in the response images. nontree_val : int, optional The value that designates non-tree pixels in the response images. """ if tree_val is None: tree_val = settings.RESPONSE_TREE_VAL self.tree_val = tree_val if nontree_val is None: nontree_val = settings.RESPONSE_NONTREE_VAL self.nontree_val = nontree_val
def build_response_from_arr(self, img_binary): """ Build response (flat) array from a binary (tree/non-tree) image array. Parameters ---------- img_binary : numpy ndarray Two-dimensional binary (tree-non-tree) image array. Returns ------- responses : numpy ndarray Array with the pixel responses. """ response_arr = img_binary.copy() response_arr[response_arr == self.tree_val] = 1 response_arr[response_arr == self.nontree_val] = 0 # check that the provided `img_binary` is actually binary, i.e., consists only # of `tree_val` and `nontree_val` values if ((response_arr != 0) & (response_arr != 1)).any(): raise NonBinaryResponseError return response_arr.flatten() def build_response_from_filepath(self, img_filepath): """ Build response (flat) array from a binary (tree/non-tree) image file. Parameters ---------- img_filepath : str, file object or pathlib.Path object Path to a file, URI, file object opened in binary ('rb') mode, or a Path object representing the binary (tree/non-tree) image to be transformed into the response. The value will be passed to `rasterio.open`. Returns ------- responses : numpy ndarray Array with the pixel responses. """ with rio.open(img_filepath) as src: img_binary = src.read(1) try: return self.build_response_from_arr(img_binary) except NonBinaryResponseError: raise ValueError( f"The response mask {img_filepath} must consist of only {self.tree_val}" f" (tree) and {self.nontree_val} (non-tree) pixel values" )
[docs] def build_response( self, *, split_df=None, response_img_dir=None, response_img_filepaths=None, img_filename_pattern=None, method=None, img_cluster=None, ): """ Build the pixel response (flat) array for a list of images. Parameters ---------- split_df : pd.DataFrame Data frame with the train/test split. response_img_dir : str representing path to a directory, optional Path to the directory where the response images are located. Required if providing `split_df`. Otherwise `response_img_dir` might either be ignored if providing `response_img_filepaths`, or be used as the directory where the images whose filename matches `img_filename_pattern` are to be located. response_img_filepaths : list of image file paths, optional List of images to be transformed into the response. Alternatively, the same information can be provided by means of the `img_dir` and `img_filename_pattern` keyword arguments. Ignored if providing `split_df`. img_filename_pattern : str representing a file-name pattern, optional Filename pattern to be matched in order to obtain the list of images. If no value is provided, the value set in `settings.IMG_FILENAME_PATTERN` is used. Ignored if `split_df` or `img_filepaths` is provided. method : {'cluster-I', 'cluster-II'}, optional Method used in the train/test split. img_cluster : int, optional The label of the cluster of images. Only used if `method` is 'cluster-II'. Returns ------- responses : numpy ndarray Array with the pixel responses. """ if split_df is not None: if response_img_dir is None: raise ValueError( "If `split_df` is provided, `response_img_dir` must also be" " provided" ) if method is None: if "img_cluster" in split_df: method = "cluster-II" else: method = "cluster-I" if method == "cluster-I": img_filepaths = split_df[split_df["train"]]["img_filepath"] else: if img_cluster is None: raise ValueError( "If `method` is 'cluster-II', `img_cluster` must be provided" ) img_filepaths = utils.get_img_filepaths(split_df, img_cluster, True) response_img_filepaths = img_filepaths.apply( lambda filepath: path.join(response_img_dir, path.basename(filepath)) ) else: if response_img_filepaths is None: if img_filename_pattern is None: img_filename_pattern = settings.IMG_FILENAME_PATTERN if response_img_dir is None: raise ValueError( "Either `split_df`, `response_img_filepaths` or " "`response_img_dir` must be provided" ) response_img_filepaths = glob.glob( path.join(response_img_dir, img_filename_pattern) ) # TODO: `response_img_filepaths` # no need for dask here values = [] for response_img_filepath in response_img_filepaths: values.append(self.build_response_from_filepath(response_img_filepath)) return np.vstack(values).flatten()